ia64/xen-unstable

changeset 16811:420f98121491

minios: support COW for a zero page
Permits to support sparse data.
Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jan 18 16:20:13 2008 +0000 (2008-01-18)
parents ec10c9a2d976
children 1e6455d608bd
files extras/mini-os/arch/x86/mm.c extras/mini-os/arch/x86/traps.c extras/mini-os/include/ia64/arch_mm.h extras/mini-os/include/types.h extras/mini-os/include/x86/arch_mm.h extras/mini-os/include/x86/traps.h
line diff
     1.1 --- a/extras/mini-os/arch/x86/mm.c	Fri Jan 18 16:09:05 2008 +0000
     1.2 +++ b/extras/mini-os/arch/x86/mm.c	Fri Jan 18 16:20:13 2008 +0000
     1.3 @@ -50,6 +50,7 @@
     1.4  #endif
     1.5  
     1.6  unsigned long *phys_to_machine_mapping;
     1.7 +unsigned long mfn_zero;
     1.8  extern char stack[];
     1.9  extern void page_walk(unsigned long virt_addr);
    1.10  
    1.11 @@ -492,10 +493,13 @@ void *map_frames_ex(unsigned long *f, un
    1.12  static void clear_bootstrap(void)
    1.13  {
    1.14      struct xen_memory_reservation reservation;
    1.15 -    xen_pfn_t mfns[] = { virt_to_mfn(0), virt_to_mfn(&shared_info) };
    1.16 +    xen_pfn_t mfns[] = { virt_to_mfn(&shared_info) };
    1.17      int n = sizeof(mfns)/sizeof(*mfns);
    1.18      pte_t nullpte = { };
    1.19  
    1.20 +    /* Use page 0 as the CoW zero page */
    1.21 +    memset(NULL, 0, PAGE_SIZE);
    1.22 +    mfn_zero = pfn_to_mfn(0);
    1.23      if (HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG))
    1.24  	printk("Unable to unmap page 0\n");
    1.25  
     2.1 --- a/extras/mini-os/arch/x86/traps.c	Fri Jan 18 16:09:05 2008 +0000
     2.2 +++ b/extras/mini-os/arch/x86/traps.c	Fri Jan 18 16:20:13 2008 +0000
     2.3 @@ -118,6 +118,46 @@ void page_walk(unsigned long virt_addres
     2.4  
     2.5  }
     2.6  
     2.7 +static int handle_cow(unsigned long addr) {
     2.8 +        pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
     2.9 +	unsigned long new_page;
    2.10 +	int rc;
    2.11 +
    2.12 +#if defined(__x86_64__)
    2.13 +        page = tab[l4_table_offset(addr)];
    2.14 +	if (!(page & _PAGE_PRESENT))
    2.15 +	    return 0;
    2.16 +        tab = pte_to_virt(page);
    2.17 +#endif
    2.18 +#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
    2.19 +        page = tab[l3_table_offset(addr)];
    2.20 +	if (!(page & _PAGE_PRESENT))
    2.21 +	    return 0;
    2.22 +        tab = pte_to_virt(page);
    2.23 +#endif
    2.24 +        page = tab[l2_table_offset(addr)];
    2.25 +	if (!(page & _PAGE_PRESENT))
    2.26 +	    return 0;
    2.27 +        tab = pte_to_virt(page);
    2.28 +        
    2.29 +        page = tab[l1_table_offset(addr)];
    2.30 +	if (!(page & _PAGE_PRESENT))
    2.31 +	    return 0;
    2.32 +	/* Only support CoW for the zero page.  */
    2.33 +	if (PHYS_PFN(page) != mfn_zero)
    2.34 +	    return 0;
    2.35 +
    2.36 +	new_page = alloc_pages(0);
    2.37 +	memset((void*) new_page, 0, PAGE_SIZE);
    2.38 +
    2.39 +	rc = HYPERVISOR_update_va_mapping(addr & PAGE_MASK, __pte(virt_to_mach(new_page) | L1_PROT), UVMF_INVLPG);
    2.40 +	if (!rc)
    2.41 +		return 1;
    2.42 +
    2.43 +	printk("Map zero page to %lx failed: %d.\n", addr, rc);
    2.44 +	return 0;
    2.45 +}
    2.46 +
    2.47  #define read_cr2() \
    2.48          (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2)
    2.49  
    2.50 @@ -126,6 +166,10 @@ static int handling_pg_fault = 0;
    2.51  void do_page_fault(struct pt_regs *regs, unsigned long error_code)
    2.52  {
    2.53      unsigned long addr = read_cr2();
    2.54 +
    2.55 +    if ((error_code & TRAP_PF_WRITE) && handle_cow(addr))
    2.56 +	return;
    2.57 +
    2.58      /* If we are already handling a page fault, and got another one
    2.59         that means we faulted in pagetable walk. Continuing here would cause
    2.60         a recursive fault */       
     3.1 --- a/extras/mini-os/include/ia64/arch_mm.h	Fri Jan 18 16:09:05 2008 +0000
     3.2 +++ b/extras/mini-os/include/ia64/arch_mm.h	Fri Jan 18 16:20:13 2008 +0000
     3.3 @@ -37,5 +37,7 @@
     3.4  #define STACK_SIZE              (PAGE_SIZE * (1 << STACK_SIZE_PAGE_ORDER))
     3.5  
     3.6  #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0)
     3.7 +/* TODO */
     3.8 +#define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0)
     3.9  
    3.10  #endif /* __ARCH_MM_H__ */
     4.1 --- a/extras/mini-os/include/types.h	Fri Jan 18 16:09:05 2008 +0000
     4.2 +++ b/extras/mini-os/include/types.h	Fri Jan 18 16:20:13 2008 +0000
     4.3 @@ -57,6 +57,13 @@ typedef unsigned long       uintptr_t;
     4.4  typedef struct { unsigned long pte; } pte_t;
     4.5  #endif /* __i386__ || __x86_64__ */
     4.6  
     4.7 +#if !defined(CONFIG_X86_PAE)
     4.8 +#define __pte(x) ((pte_t) { (x) } )
     4.9 +#else
    4.10 +#define __pte(x) ({ unsigned long long _x = (x);        \
    4.11 +    ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
    4.12 +#endif
    4.13 +
    4.14  typedef  u8 uint8_t;
    4.15  typedef  s8 int8_t;
    4.16  typedef u16 uint16_t;
     5.1 --- a/extras/mini-os/include/x86/arch_mm.h	Fri Jan 18 16:09:05 2008 +0000
     5.2 +++ b/extras/mini-os/include/x86/arch_mm.h	Fri Jan 18 16:20:13 2008 +0000
     5.3 @@ -144,12 +144,14 @@ typedef unsigned long pgentry_t;
     5.4  
     5.5  #if defined(__i386__)
     5.6  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
     5.7 +#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED)
     5.8  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER)
     5.9  #if defined(CONFIG_X86_PAE)
    5.10  #define L3_PROT (_PAGE_PRESENT)
    5.11  #endif /* CONFIG_X86_PAE */
    5.12  #elif defined(__x86_64__)
    5.13  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    5.14 +#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_USER)
    5.15  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    5.16  #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    5.17  #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
    5.18 @@ -190,6 +192,7 @@ typedef unsigned long maddr_t;
    5.19  
    5.20  extern unsigned long *phys_to_machine_mapping;
    5.21  extern char _text, _etext, _erodata, _edata, _end;
    5.22 +extern unsigned long mfn_zero;
    5.23  #define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
    5.24  static __inline__ maddr_t phys_to_machine(paddr_t phys)
    5.25  {
    5.26 @@ -224,5 +227,6 @@ static __inline__ paddr_t machine_to_phy
    5.27  #define pte_to_virt(_pte)          to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << PAGE_SHIFT)
    5.28  
    5.29  #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT)
    5.30 +#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, L1_PROT_RO)
    5.31  
    5.32  #endif /* _ARCH_MM_H_ */
     6.1 --- a/extras/mini-os/include/x86/traps.h	Fri Jan 18 16:09:05 2008 +0000
     6.2 +++ b/extras/mini-os/include/x86/traps.h	Fri Jan 18 16:20:13 2008 +0000
     6.3 @@ -70,4 +70,8 @@ struct pt_regs {
     6.4  
     6.5  void dump_regs(struct pt_regs *regs);
     6.6  
     6.7 +#define TRAP_PF_PROT   0x1
     6.8 +#define TRAP_PF_WRITE  0x2
     6.9 +#define TRAP_PF_USER   0x4
    6.10 +
    6.11  #endif /* _TRAPS_H_ */