ia64/xen-unstable

changeset 9940:44e5abbf333b

Linux: upgrade to 2.6.16.13.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Thu May 04 17:38:25 2006 +0100 (2006-05-04)
parents d36ac8bf715e
children ad5b833122a8
files buildconfigs/mk.linux-2.6-xen linux-2.6-xen-sparse/arch/i386/kernel/vm86.c linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c linux-2.6-xen-sparse/drivers/char/tty_io.c linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h linux-2.6-xen-sparse/include/linux/mm.h linux-2.6-xen-sparse/mm/page_alloc.c linux-2.6-xen-sparse/net/core/dev.c
line diff
     1.1 --- a/buildconfigs/mk.linux-2.6-xen	Thu May 04 16:49:58 2006 +0100
     1.2 +++ b/buildconfigs/mk.linux-2.6-xen	Thu May 04 17:38:25 2006 +0100
     1.3 @@ -1,5 +1,5 @@
     1.4  LINUX_SERIES = 2.6
     1.5 -LINUX_VER    = 2.6.16
     1.6 +LINUX_VER    = 2.6.16.13
     1.7  
     1.8  EXTRAVERSION ?= xen
     1.9  
     2.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c	Thu May 04 16:49:58 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vm86.c	Thu May 04 17:38:25 2006 +0100
     2.3 @@ -43,6 +43,7 @@
     2.4  #include <linux/smp_lock.h>
     2.5  #include <linux/highmem.h>
     2.6  #include <linux/ptrace.h>
     2.7 +#include <linux/audit.h>
     2.8  
     2.9  #include <asm/uaccess.h>
    2.10  #include <asm/io.h>
    2.11 @@ -258,6 +259,7 @@ static void do_sys_vm86(struct kernel_vm
    2.12  #ifndef CONFIG_X86_NO_TSS
    2.13  	struct tss_struct *tss;
    2.14  #endif
    2.15 +	long eax;
    2.16  /*
    2.17   * make sure the vm86() system call doesn't try to do anything silly
    2.18   */
    2.19 @@ -313,13 +315,19 @@ static void do_sys_vm86(struct kernel_vm
    2.20  	tsk->thread.screen_bitmap = info->screen_bitmap;
    2.21  	if (info->flags & VM86_SCREEN_BITMAP)
    2.22  		mark_screen_rdonly(tsk->mm);
    2.23 +	__asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t");
    2.24 +	__asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax));
    2.25 +
    2.26 +	/*call audit_syscall_exit since we do not exit via the normal paths */
    2.27 +	if (unlikely(current->audit_context))
    2.28 +		audit_syscall_exit(current, AUDITSC_RESULT(eax), eax);
    2.29 +
    2.30  	__asm__ __volatile__(
    2.31 -		"xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
    2.32  		"movl %0,%%esp\n\t"
    2.33  		"movl %1,%%ebp\n\t"
    2.34  		"jmp resume_userspace"
    2.35  		: /* no outputs */
    2.36 -		:"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax");
    2.37 +		:"r" (&info->regs), "r" (task_thread_info(tsk)));
    2.38  	/* we never return here */
    2.39  }
    2.40  
     3.1 --- a/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile	Thu May 04 16:49:58 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/ia32/Makefile	Thu May 04 17:38:25 2006 +0100
     3.3 @@ -28,11 +28,11 @@ quiet_cmd_syscall = SYSCALL $@
     3.4  $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
     3.5  	$(call if_changed,syscall)
     3.6  
     3.7 -AFLAGS_vsyscall-sysenter.o = -m32 -Iarch/i386/kernel
     3.8 -AFLAGS_vsyscall-syscall.o = -m32 -Iarch/i386/kernel
     3.9 +AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel
    3.10 +AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel
    3.11  
    3.12  ifdef CONFIG_XEN
    3.13 -AFLAGS_vsyscall-int80.o = -m32 -Iarch/i386/kernel
    3.14 +AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel
    3.15  CFLAGS_syscall32-xen.o += -DUSE_INT80
    3.16  AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80
    3.17  
     4.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S	Thu May 04 16:49:58 2006 +0100
     4.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S	Thu May 04 17:38:25 2006 +0100
     4.3 @@ -221,6 +221,10 @@ rff_trace:
     4.4   *
     4.5   * XXX	if we had a free scratch register we could save the RSP into the stack frame
     4.6   *      and report it properly in ps. Unfortunately we haven't.
     4.7 + *
     4.8 + * When user can change the frames always force IRET. That is because
     4.9 + * it deals with uncanonical addresses better. SYSRET has trouble
    4.10 + * with them due to bugs in both AMD and Intel CPUs.
    4.11   */ 			 		
    4.12  
    4.13  ENTRY(system_call)
    4.14 @@ -289,7 +293,10 @@ sysret_signal:
    4.15  	xorl %esi,%esi # oldset -> arg2
    4.16  	call ptregscall_common
    4.17  1:	movl $_TIF_NEED_RESCHED,%edi
    4.18 -	jmp sysret_check
    4.19 +	/* Use IRET because user could have changed frame. This
    4.20 +	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
    4.21 +	cli
    4.22 +	jmp int_with_check
    4.23  	
    4.24  badsys:
    4.25  	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
    4.26 @@ -315,7 +322,8 @@ 1:	SAVE_REST
    4.27  	call syscall_trace_leave
    4.28  	RESTORE_TOP_OF_STACK %rbx
    4.29  	RESTORE_REST
    4.30 -	jmp ret_from_sys_call
    4.31 +	/* Use IRET because user could have changed frame */
    4.32 +	jmp int_ret_from_sys_call
    4.33  	CFI_ENDPROC
    4.34  		
    4.35  /* 
    4.36 @@ -449,25 +457,9 @@ ENTRY(stub_execve)
    4.37  	CFI_ADJUST_CFA_OFFSET -8
    4.38  	CFI_REGISTER rip, r11
    4.39  	SAVE_REST
    4.40 -	movq %r11, %r15
    4.41 -	CFI_REGISTER rip, r15
    4.42  	FIXUP_TOP_OF_STACK %r11
    4.43  	call sys_execve
    4.44 -	GET_THREAD_INFO(%rcx)
    4.45 -	bt $TIF_IA32,threadinfo_flags(%rcx)
    4.46 -	CFI_REMEMBER_STATE
    4.47 -	jc exec_32bit
    4.48  	RESTORE_TOP_OF_STACK %r11
    4.49 -	movq %r15, %r11
    4.50 -	CFI_REGISTER rip, r11
    4.51 -	RESTORE_REST
    4.52 -	pushq %r11
    4.53 -	CFI_ADJUST_CFA_OFFSET 8
    4.54 -	CFI_REL_OFFSET rip, 0
    4.55 -	ret
    4.56 -
    4.57 -exec_32bit:
    4.58 -	CFI_RESTORE_STATE
    4.59  	movq %rax,RAX(%rsp)
    4.60  	RESTORE_REST
    4.61  	jmp int_ret_from_sys_call
     5.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c	Thu May 04 16:49:58 2006 +0100
     5.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c	Thu May 04 17:38:25 2006 +0100
     5.3 @@ -484,6 +484,10 @@ static inline void __save_init_fpu( stru
     5.4  	 * This is basically '__unlazy_fpu', except that we queue a
     5.5  	 * multicall to indicate FPU task switch, rather than
     5.6  	 * synchronously trapping to Xen.
     5.7 +	 * This must be here to ensure both math_state_restore() and
     5.8 +	 * kernel_fpu_begin() work consistently.
     5.9 +	 * The AMD workaround requires it to be after DS reload, or
    5.10 +	 * after DS has been cleared, which we do in __prepare_arch_switch.
    5.11  	 */
    5.12  	if (prev_p->thread_info->status & TS_USEDFPU) {
    5.13  		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
     6.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Thu May 04 16:49:58 2006 +0100
     6.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Thu May 04 17:38:25 2006 +0100
     6.3 @@ -1157,6 +1157,10 @@ static int __init init_amd(struct cpuinf
     6.4  	if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
     6.5  		set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
     6.6  
     6.7 +	/* Enable workaround for FXSAVE leak */
     6.8 +	if (c->x86 >= 6)
     6.9 +		set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
    6.10 +
    6.11  	r = get_model_name(c);
    6.12  	if (!r) { 
    6.13  		switch (c->x86) { 
     7.1 --- a/linux-2.6-xen-sparse/drivers/char/tty_io.c	Thu May 04 16:49:58 2006 +0100
     7.2 +++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c	Thu May 04 17:38:25 2006 +0100
     7.3 @@ -2708,7 +2708,11 @@ static void __do_SAK(void *arg)
     7.4  		}
     7.5  		task_lock(p);
     7.6  		if (p->files) {
     7.7 -			rcu_read_lock();
     7.8 +			/*
     7.9 +			 * We don't take a ref to the file, so we must
    7.10 +			 * hold ->file_lock instead.
    7.11 +			 */
    7.12 +			spin_lock(&p->files->file_lock);
    7.13  			fdt = files_fdtable(p->files);
    7.14  			for (i=0; i < fdt->max_fds; i++) {
    7.15  				filp = fcheck_files(p->files, i);
    7.16 @@ -2723,7 +2727,7 @@ static void __do_SAK(void *arg)
    7.17  					break;
    7.18  				}
    7.19  			}
    7.20 -			rcu_read_unlock();
    7.21 +			spin_unlock(&p->files->file_lock);
    7.22  		}
    7.23  		task_unlock(p);
    7.24  	} while_each_task_pid(session, PIDTYPE_SID, p);
     8.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h	Thu May 04 16:49:58 2006 +0100
     8.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h	Thu May 04 17:38:25 2006 +0100
     8.3 @@ -33,6 +33,9 @@
     8.4  
     8.5  #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
     8.6  
     8.7 +#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
     8.8 +#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
     8.9 +
    8.10  #define ptep_get_and_clear(mm,addr,xp)	__pte_ma(xchg(&(xp)->pte_low, 0))
    8.11  #define pte_same(a, b)		((a).pte_low == (b).pte_low)
    8.12  #define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
     9.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h	Thu May 04 16:49:58 2006 +0100
     9.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h	Thu May 04 17:38:25 2006 +0100
     9.3 @@ -107,6 +107,26 @@ static inline void pud_clear (pud_t * pu
     9.4  #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
     9.5  			pmd_index(address))
     9.6  
     9.7 +/*
     9.8 + * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
     9.9 + * entry, so clear the bottom half first and enforce ordering with a compiler
    9.10 + * barrier.
    9.11 + */
    9.12 +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
    9.13 +{
    9.14 +	ptep->pte_low = 0;
    9.15 +	smp_wmb();
    9.16 +	ptep->pte_high = 0;
    9.17 +}
    9.18 +
    9.19 +static inline void pmd_clear(pmd_t *pmd)
    9.20 +{
    9.21 +	u32 *tmp = (u32 *)pmd;
    9.22 +	*tmp = 0;
    9.23 +	smp_wmb();
    9.24 +	*(tmp + 1) = 0;
    9.25 +}
    9.26 +
    9.27  static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
    9.28  {
    9.29  	pte_t res;
    10.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h	Thu May 04 16:49:58 2006 +0100
    10.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable.h	Thu May 04 17:38:25 2006 +0100
    10.3 @@ -205,14 +205,12 @@ extern unsigned long long __PAGE_KERNEL,
    10.4  extern unsigned long pg0[];
    10.5  
    10.6  #define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
    10.7 -#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
    10.8  
    10.9  /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
   10.10  #define pmd_none(x)	(!(unsigned long)pmd_val(x))
   10.11  /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
   10.12     can temporarily clear it. */
   10.13  #define pmd_present(x)	(pmd_val(x))
   10.14 -#define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
   10.15  #define pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
   10.16  
   10.17  
   10.18 @@ -272,16 +270,7 @@ static inline pte_t ptep_get_and_clear_f
   10.19  	pte_t pte;
   10.20  	if (full) {
   10.21  		pte = *ptep;
   10.22 -#ifdef CONFIG_X86_PAE
   10.23 -		/* Cannot do this in a single step, as the compiler may
   10.24 -		   issue the two stores in either order, but the hypervisor
   10.25 -		   must not see the high part before the low one. */
   10.26 -		ptep->pte_low = 0;
   10.27 -		barrier();
   10.28 -		ptep->pte_high = 0;
   10.29 -#else
   10.30 -		*ptep = __pte(0);
   10.31 -#endif
   10.32 +		pte_clear(mm, addr, ptep);
   10.33  	} else {
   10.34  		pte = ptep_get_and_clear(mm, addr, ptep);
   10.35  	}
    11.1 --- a/linux-2.6-xen-sparse/include/linux/mm.h	Thu May 04 16:49:58 2006 +0100
    11.2 +++ b/linux-2.6-xen-sparse/include/linux/mm.h	Thu May 04 17:38:25 2006 +0100
    11.3 @@ -232,10 +232,9 @@ struct page {
    11.4  		unsigned long private;		/* Mapping-private opaque data:
    11.5  					 	 * usually used for buffer_heads
    11.6  						 * if PagePrivate set; used for
    11.7 -						 * swp_entry_t if PageSwapCache.
    11.8 -						 * When page is free, this
    11.9 +						 * swp_entry_t if PageSwapCache;
   11.10  						 * indicates order in the buddy
   11.11 -						 * system.
   11.12 +						 * system if PG_buddy is set.
   11.13  						 */
   11.14  		struct address_space *mapping;	/* If low bit clear, points to
   11.15  						 * inode address_space, or NULL.
    12.1 --- a/linux-2.6-xen-sparse/mm/page_alloc.c	Thu May 04 16:49:58 2006 +0100
    12.2 +++ b/linux-2.6-xen-sparse/mm/page_alloc.c	Thu May 04 17:38:25 2006 +0100
    12.3 @@ -153,7 +153,8 @@ static void bad_page(struct page *page)
    12.4  			1 << PG_reclaim |
    12.5  			1 << PG_slab    |
    12.6  			1 << PG_swapcache |
    12.7 -			1 << PG_writeback );
    12.8 +			1 << PG_writeback |
    12.9 +			1 << PG_buddy );
   12.10  	set_page_count(page, 0);
   12.11  	reset_page_mapcount(page);
   12.12  	page->mapping = NULL;
   12.13 @@ -224,12 +225,12 @@ static inline unsigned long page_order(s
   12.14  
   12.15  static inline void set_page_order(struct page *page, int order) {
   12.16  	set_page_private(page, order);
   12.17 -	__SetPagePrivate(page);
   12.18 +	__SetPageBuddy(page);
   12.19  }
   12.20  
   12.21  static inline void rmv_page_order(struct page *page)
   12.22  {
   12.23 -	__ClearPagePrivate(page);
   12.24 +	__ClearPageBuddy(page);
   12.25  	set_page_private(page, 0);
   12.26  }
   12.27  
   12.28 @@ -268,11 +269,13 @@ static inline unsigned long
   12.29   * This function checks whether a page is free && is the buddy
   12.30   * we can do coalesce a page and its buddy if
   12.31   * (a) the buddy is not in a hole &&
   12.32 - * (b) the buddy is free &&
   12.33 - * (c) the buddy is on the buddy system &&
   12.34 - * (d) a page and its buddy have the same order.
   12.35 - * for recording page's order, we use page_private(page) and PG_private.
   12.36 + * (b) the buddy is in the buddy system &&
   12.37 + * (c) a page and its buddy have the same order.
   12.38   *
   12.39 + * For recording whether a page is in the buddy system, we use PG_buddy.
   12.40 + * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
   12.41 + *
   12.42 + * For recording page's order, we use page_private(page).
   12.43   */
   12.44  static inline int page_is_buddy(struct page *page, int order)
   12.45  {
   12.46 @@ -281,10 +284,10 @@ static inline int page_is_buddy(struct p
   12.47  		return 0;
   12.48  #endif
   12.49  
   12.50 -       if (PagePrivate(page)           &&
   12.51 -           (page_order(page) == order) &&
   12.52 -            page_count(page) == 0)
   12.53 +	if (PageBuddy(page) && page_order(page) == order) {
   12.54 +		BUG_ON(page_count(page) != 0);
   12.55                 return 1;
   12.56 +	}
   12.57         return 0;
   12.58  }
   12.59  
   12.60 @@ -301,7 +304,7 @@ static inline int page_is_buddy(struct p
   12.61   * as necessary, plus some accounting needed to play nicely with other
   12.62   * parts of the VM system.
   12.63   * At each level, we keep a list of pages, which are heads of continuous
   12.64 - * free pages of length of (1 << order) and marked with PG_Private.Page's
   12.65 + * free pages of length of (1 << order) and marked with PG_buddy. Page's
   12.66   * order is recorded in page_private(page) field.
   12.67   * So when we are allocating or freeing one, we can derive the state of the
   12.68   * other.  That is, if we allocate a small block, and both were   
   12.69 @@ -364,7 +367,8 @@ static inline int free_pages_check(struc
   12.70  			1 << PG_slab	|
   12.71  			1 << PG_swapcache |
   12.72  			1 << PG_writeback |
   12.73 -			1 << PG_reserved ))))
   12.74 +			1 << PG_reserved |
   12.75 +			1 << PG_buddy ))))
   12.76  		bad_page(page);
   12.77  	if (PageDirty(page))
   12.78  		__ClearPageDirty(page);
   12.79 @@ -523,7 +527,8 @@ static int prep_new_page(struct page *pa
   12.80  			1 << PG_slab    |
   12.81  			1 << PG_swapcache |
   12.82  			1 << PG_writeback |
   12.83 -			1 << PG_reserved ))))
   12.84 +			1 << PG_reserved |
   12.85 +			1 << PG_buddy ))))
   12.86  		bad_page(page);
   12.87  
   12.88  	/*
    13.1 --- a/linux-2.6-xen-sparse/net/core/dev.c	Thu May 04 16:49:58 2006 +0100
    13.2 +++ b/linux-2.6-xen-sparse/net/core/dev.c	Thu May 04 17:38:25 2006 +0100
    13.3 @@ -2994,11 +2994,11 @@ void netdev_run_todo(void)
    13.4  
    13.5  		switch(dev->reg_state) {
    13.6  		case NETREG_REGISTERING:
    13.7 +			dev->reg_state = NETREG_REGISTERED;
    13.8  			err = netdev_register_sysfs(dev);
    13.9  			if (err)
   13.10  				printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
   13.11  				       dev->name, err);
   13.12 -			dev->reg_state = NETREG_REGISTERED;
   13.13  			break;
   13.14  
   13.15  		case NETREG_UNREGISTERING: