ia64/xen-unstable
changeset 1359:8d56cd44e887
bitkeeper revision 1.896 (40a0e9e8M0uaTwE5LBe9sIhr2vdX7Q)
Live migration initial checkin.
Live migration initial checkin.
line diff
1.1 --- a/tools/examples/xc_dom_control.py Tue May 11 14:31:55 2004 +0000 1.2 +++ b/tools/examples/xc_dom_control.py Tue May 11 14:57:44 2004 +0000 1.3 @@ -139,10 +139,12 @@ elif cmd == 'suspend': 1.4 xc.domain_stop( dom=dom ) 1.5 1.6 while not xc.domain_getinfo( first_dom=dom, max_doms=1 )[0]['stopped']: 1.7 - time.sleep(0.1); 1.8 + print "Sleep..." 1.9 + time.sleep(0.001); 1.10 1.11 rc = xc.linux_save( dom=dom, state_file=file, progress=1) 1.12 if rc == 0 : xc.domain_destroy( dom=dom, force=1 ) 1.13 + else: xc.domain_start( dom=dom ) # sensible for production use 1.14 1.15 elif cmd == 'cpu_bvtslice': 1.16 if len(sys.argv) < 3:
2.1 --- a/tools/xc/lib/xc.h Tue May 11 14:31:55 2004 +0000 2.2 +++ b/tools/xc/lib/xc.h Tue May 11 14:57:44 2004 +0000 2.3 @@ -57,7 +57,10 @@ int xc_domain_getinfo(int xc_handle, 2.4 2.5 int xc_shadow_control(int xc_handle, 2.6 u64 domid, 2.7 - unsigned int sop); 2.8 + unsigned int sop, 2.9 + unsigned long *dirty_bitmap, 2.10 + unsigned long pages); 2.11 + 2.12 2.13 #define XCFLAGS_VERBOSE 1 2.14 #define XCFLAGS_LIVE 2 2.15 @@ -247,11 +250,6 @@ int xc_readconsolering(int xc_handle, 2.16 int xc_physinfo(int xc_handle, 2.17 xc_physinfo_t *info); 2.18 2.19 - 2.20 -int xc_shadow_control(int xc_handle, 2.21 - u64 domid, 2.22 - unsigned int sop); 2.23 - 2.24 int xc_domain_setname(int xc_handle, 2.25 u64 domid, 2.26 char *name);
3.1 --- a/tools/xc/lib/xc_domain.c Tue May 11 14:31:55 2004 +0000 3.2 +++ b/tools/xc/lib/xc_domain.c Tue May 11 14:57:44 2004 +0000 3.3 @@ -109,13 +109,24 @@ int xc_domain_getinfo(int xc_handle, 3.4 3.5 int xc_shadow_control(int xc_handle, 3.6 u64 domid, 3.7 - unsigned int sop) 3.8 + unsigned int sop, 3.9 + unsigned long *dirty_bitmap, 3.10 + unsigned long pages) 3.11 { 3.12 + int rc; 3.13 dom0_op_t op; 3.14 op.cmd = DOM0_SHADOW_CONTROL; 3.15 op.u.shadow_control.domain = (domid_t)domid; 3.16 op.u.shadow_control.op = sop; 3.17 - return do_dom0_op(xc_handle, &op); 3.18 + op.u.shadow_control.dirty_bitmap = dirty_bitmap; 3.19 + op.u.shadow_control.pages = pages; 3.20 + 3.21 + rc = do_dom0_op(xc_handle, &op); 3.22 + 3.23 + if ( rc == 0 ) 3.24 + return op.u.shadow_control.pages; 3.25 + else 3.26 + return rc; 3.27 } 3.28 3.29 int xc_domain_setname(int xc_handle,
4.1 --- a/tools/xc/lib/xc_linux_build.c Tue May 11 14:31:55 2004 +0000 4.2 +++ b/tools/xc/lib/xc_linux_build.c Tue May 11 14:57:44 2004 +0000 4.3 @@ -284,7 +284,7 @@ static int setup_guestos(int xc_handle, 4.4 4.5 /* shared_info page starts its life empty. */ 4.6 shared_info = map_pfn_writeable(pm_handle, shared_info_frame); 4.7 - memset(shared_info, 0, PAGE_SIZE); 4.8 + memset(shared_info, 0, sizeof(shared_info_t)); 4.9 /* Mask all upcalls... */ 4.10 for ( i = 0; i < MAX_VIRT_CPUS; i++ ) 4.11 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
5.1 --- a/tools/xc/lib/xc_linux_restore.c Tue May 11 14:31:55 2004 +0000 5.2 +++ b/tools/xc/lib/xc_linux_restore.c Tue May 11 14:57:44 2004 +0000 5.3 @@ -230,9 +230,16 @@ int xc_linux_restore(int xc_handle, 5.4 goto out; 5.5 } 5.6 5.7 - //printf("batch=%d\n",j); 5.8 + printf("batch %d\n",j); 5.9 5.10 - if(j==0) break; // our work here is done 5.11 + if (j == 0) 5.12 + break; // our work here is done 5.13 + 5.14 + if( j > MAX_BATCH_SIZE ) 5.15 + { 5.16 + ERROR("Max batch size exceeded. Giving up."); 5.17 + goto out; 5.18 + } 5.19 5.20 if ( (*readerfn)(readerst, region_pfn_type, j*sizeof(unsigned long)) ) 5.21 { 5.22 @@ -242,6 +249,9 @@ int xc_linux_restore(int xc_handle, 5.23 5.24 for(i=0;i<j;i++) 5.25 { 5.26 + if ((region_pfn_type[i]>>29) == 7) 5.27 + continue; 5.28 + 5.29 pfn = region_pfn_type[i] & ~PGT_type_mask; 5.30 mfn = pfn_to_mfn_table[pfn]; 5.31 5.32 @@ -261,9 +271,15 @@ int xc_linux_restore(int xc_handle, 5.33 unsigned long *ppage; 5.34 5.35 pfn = region_pfn_type[i] & ~PGT_type_mask; 5.36 + 5.37 +//if(n>=nr_pfns || ((region_pfn_type[i] & PGT_type_mask) == L2TAB) ) printf("pfn=%08lx mfn=%x\n",region_pfn_type[i],pfn_to_mfn_table[pfn]); 5.38 + 5.39 5.40 //if(pfn_type[i])printf("^pfn=%d %08lx\n",pfn,pfn_type[i]); 5.41 5.42 + if ((region_pfn_type[i]>>29) == 7) 5.43 + continue; 5.44 + 5.45 if (pfn>nr_pfns) 5.46 { 5.47 ERROR("pfn out of range"); 5.48 @@ -304,7 +320,7 @@ int xc_linux_restore(int xc_handle, 5.49 5.50 if ( xpfn >= nr_pfns ) 5.51 { 5.52 - ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns); 5.53 + ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns); 5.54 goto out; 5.55 } 5.56 #if 0 5.57 @@ -355,17 +371,19 @@ int xc_linux_restore(int xc_handle, 5.58 default: 5.59 ERROR("Bogus page type %x page table is out of range. i=%d nr_pfns=%d",region_pfn_type[i],i,nr_pfns); 5.60 goto out; 5.61 - } 5.62 + 5.63 + } // end of page type switch statement 5.64 5.65 if ( add_mmu_update(xc_handle, mmu, 5.66 (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, pfn) ) 5.67 goto out; 5.68 5.69 - } 5.70 + } // end of 'batch' for loop 5.71 5.72 n+=j; // crude stats 5.73 5.74 } 5.75 +printf("RECEIVED ALL PAGES\n"); 5.76 5.77 mfn_mapper_close( region_mapper ); 5.78 5.79 @@ -381,7 +399,10 @@ int xc_linux_restore(int xc_handle, 5.80 (pfn_to_mfn_table[i]<<PAGE_SHIFT) | 5.81 MMU_EXTENDED_COMMAND, 5.82 MMUEXT_PIN_L1_TABLE) ) 5.83 + { 5.84 + printf("ERR pin L1 pfn=%lx mfn=%lx\n"); 5.85 goto out; 5.86 + } 5.87 } 5.88 else if ( pfn_type[i] == L2TAB ) 5.89 { 5.90 @@ -389,7 +410,10 @@ int xc_linux_restore(int xc_handle, 5.91 (pfn_to_mfn_table[i]<<PAGE_SHIFT) | 5.92 MMU_EXTENDED_COMMAND, 5.93 MMUEXT_PIN_L2_TABLE) ) 5.94 + { 5.95 + printf("ERR pin L2 pfn=%lx mfn=%lx\n"); 5.96 goto out; 5.97 + } 5.98 } 5.99 } 5.100 5.101 @@ -421,6 +445,8 @@ int xc_linux_restore(int xc_handle, 5.102 p_srec->resume_info.flags = 0; 5.103 unmap_pfn(pm_handle, p_srec); 5.104 5.105 +printf("new shared info is %lx\n", shared_info_frame); 5.106 + 5.107 /* Uncanonicalise each GDT frame number. */ 5.108 if ( ctxt.gdt_ents > 8192 ) 5.109 { 5.110 @@ -451,7 +477,7 @@ int xc_linux_restore(int xc_handle, 5.111 5.112 /* Copy saved contents of shared-info page. No checking needed. */ 5.113 ppage = map_pfn_writeable(pm_handle, shared_info_frame); 5.114 - memcpy(ppage, shared_info, PAGE_SIZE); 5.115 + memcpy(ppage, shared_info, sizeof(shared_info_t)); 5.116 unmap_pfn(pm_handle, ppage); 5.117 5.118 5.119 @@ -528,7 +554,9 @@ int xc_linux_restore(int xc_handle, 5.120 op.u.builddomain.ctxt = &ctxt; 5.121 rc = do_dom0_op(xc_handle, &op); 5.122 5.123 +printf("NORMAL EXIT RESTORE\n"); 5.124 out: 5.125 +printf("EXIT RESTORE\n"); 5.126 if ( mmu != NULL ) 5.127 free(mmu); 5.128
6.1 --- a/tools/xc/lib/xc_linux_save.c Tue May 11 14:31:55 2004 +0000 6.2 +++ b/tools/xc/lib/xc_linux_save.c Tue May 11 14:57:44 2004 +0000 6.3 @@ -22,11 +22,17 @@ 6.4 /* 6.5 * Returns TRUE if the given machine frame number has a unique mapping 6.6 * in the guest's pseudophysical map. 6.7 + * 0x80000000-3 mark the shared_info, and blk/net rings 6.8 */ 6.9 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ 6.10 - (((_mfn) < (1024*1024)) && \ 6.11 - (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn))) 6.12 - 6.13 + (((_mfn) < (1024*1024)) && \ 6.14 + ( ( (live_mfn_to_pfn_table[_mfn] < nr_pfns) && \ 6.15 + (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)) ) || \ 6.16 +\ 6.17 + (live_mfn_to_pfn_table[_mfn] >= 0x80000000 && \ 6.18 + live_mfn_to_pfn_table[_mfn] <= 0x80000003 ) || \ 6.19 + live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004 ) ) 6.20 + 6.21 /* Returns TRUE if MFN is successfully converted to a PFN. */ 6.22 #define translate_mfn_to_pfn(_pmfn) \ 6.23 ({ \ 6.24 @@ -40,6 +46,14 @@ 6.25 }) 6.26 6.27 6.28 +/* test_bit */ 6.29 +inline int test_bit ( int nr, volatile void * addr) 6.30 +{ 6.31 + return ( ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >> 6.32 + (nr % (sizeof(unsigned long)*8) ) ) & 1; 6.33 +} 6.34 + 6.35 + 6.36 int xc_linux_save(int xc_handle, 6.37 u64 domid, 6.38 unsigned int flags, 6.39 @@ -47,14 +61,11 @@ int xc_linux_save(int xc_handle, 6.40 void *writerst ) 6.41 { 6.42 dom0_op_t op; 6.43 - int rc = 1, i, j, k, n; 6.44 + int rc = 1, i, j, k, n, last_iter, iter = 0; 6.45 unsigned long mfn; 6.46 - unsigned int prev_pc, this_pc; 6.47 int verbose = flags & XCFLAGS_VERBOSE; 6.48 - //int live = flags & XCFLAGS_LIVE; 6.49 - 6.50 - /* state of the new MFN mapper */ 6.51 - mfn_mapper_t *mapper_handle1, *mapper_handle2; 6.52 + int live = 1; //flags & XCFLAGS_LIVE; // XXXXXXXXXXXXXXXXXXX 6.53 + int sent_last_iter, sent_this_iter, max_iters; 6.54 6.55 /* Remember if we stopped the guest, so we can restart it on exit. */ 6.56 int we_stopped_it = 0; 6.57 @@ -90,8 +101,13 @@ int xc_linux_save(int xc_handle, 6.58 unsigned char *region_base; 6.59 6.60 /* A temporary mapping, and a copy, of the guest's suspend record. */ 6.61 - suspend_record_t *p_srec, srec; 6.62 + suspend_record_t *p_srec; 6.63 6.64 + /* number of pages we're dealing with */ 6.65 + unsigned long nr_pfns; 6.66 + 6.67 + /* bitmap of pages left to send */ 6.68 + unsigned long *to_send; 6.69 6.70 if ( mlock(&ctxt, sizeof(ctxt) ) ) 6.71 { 6.72 @@ -129,21 +145,24 @@ int xc_linux_save(int xc_handle, 6.73 goto out; 6.74 } 6.75 6.76 - sleep(1); 6.77 + usleep(1000); // 1ms 6.78 + printf("Sleep for 1ms\n"); 6.79 } 6.80 6.81 +#if 1 6.82 /* A cheesy test to see whether the domain contains valid state. */ 6.83 if ( ctxt.pt_base == 0 ) 6.84 { 6.85 ERROR("Domain is not in a valid Linux guest OS state"); 6.86 goto out; 6.87 } 6.88 +#endif 6.89 6.90 6.91 /* Map the suspend-record MFN to pin it. The page must be owned by 6.92 domid for this to succeed. */ 6.93 p_srec = mfn_mapper_map_single(xc_handle, domid, 6.94 - sizeof(srec), PROT_READ, 6.95 + sizeof(*p_srec), PROT_READ, 6.96 ctxt.cpu_ctxt.esi ); 6.97 6.98 if (!p_srec) 6.99 @@ -152,10 +171,10 @@ int xc_linux_save(int xc_handle, 6.100 goto out; 6.101 } 6.102 6.103 - memcpy( &srec, p_srec, sizeof(srec) ); 6.104 + nr_pfns = p_srec->nr_pfns; 6.105 6.106 /* cheesy sanity check */ 6.107 - if ( srec.nr_pfns > 1024*1024 ) 6.108 + if ( nr_pfns > 1024*1024 ) 6.109 { 6.110 ERROR("Invalid state record -- pfn count out of range"); 6.111 goto out; 6.112 @@ -165,55 +184,13 @@ int xc_linux_save(int xc_handle, 6.113 live_pfn_to_mfn_frame_list = 6.114 mfn_mapper_map_single(xc_handle, domid, 6.115 PAGE_SIZE, PROT_READ, 6.116 - srec.pfn_to_mfn_frame_list ); 6.117 + p_srec->pfn_to_mfn_frame_list ); 6.118 6.119 if (!live_pfn_to_mfn_frame_list) 6.120 { 6.121 ERROR("Couldn't map pfn_to_mfn_frame_list"); 6.122 goto out; 6.123 } 6.124 - 6.125 - 6.126 - if ( (mapper_handle1 = mfn_mapper_init(xc_handle, domid, 6.127 - 1024*1024, PROT_READ )) 6.128 - == NULL ) 6.129 - goto out; 6.130 - 6.131 - for ( i = 0; i < (srec.nr_pfns+1023)/1024; i++ ) 6.132 - { 6.133 - /* Grab a copy of the pfn-to-mfn table frame list. 6.134 - This has the effect of preventing the page from being freed and 6.135 - given to another domain. (though the domain is stopped anyway...) */ 6.136 - mfn_mapper_queue_entry( mapper_handle1, i<<PAGE_SHIFT, 6.137 - live_pfn_to_mfn_frame_list[i], 6.138 - PAGE_SIZE ); 6.139 - } 6.140 - 6.141 - if ( mfn_mapper_flush_queue(mapper_handle1) ) 6.142 - { 6.143 - ERROR("Couldn't map pfn_to_mfn table"); 6.144 - goto out; 6.145 - } 6.146 - 6.147 - live_pfn_to_mfn_table = mfn_mapper_base( mapper_handle1 ); 6.148 - 6.149 - 6.150 - 6.151 - /* We want zeroed memory so use calloc rather than malloc. */ 6.152 - pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long)); 6.153 - 6.154 - if ( (pfn_type == NULL) ) 6.155 - { 6.156 - errno = ENOMEM; 6.157 - goto out; 6.158 - } 6.159 - 6.160 - if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ) 6.161 - { 6.162 - ERROR("Unable to mlock"); 6.163 - goto out; 6.164 - } 6.165 - 6.166 6.167 /* Track the mfn_to_pfn table down from the domains PT */ 6.168 { 6.169 @@ -233,49 +210,106 @@ int xc_linux_save(int xc_handle, 6.170 mfn_to_pfn_table_start_mfn ); 6.171 } 6.172 6.173 + /* Map all the frames of the pfn->mfn table. For migrate to succeed, 6.174 + the guest must not change which frames are used for this purpose. 6.175 + (its not clear why it would want to change them, and we'll be OK 6.176 + from a safety POV anyhow. */ 6.177 + 6.178 + live_pfn_to_mfn_table = mfn_mapper_map_batch( xc_handle, domid, 6.179 + PROT_READ, 6.180 + live_pfn_to_mfn_frame_list, 6.181 + (nr_pfns+1023)/1024 ); 6.182 + if( !live_pfn_to_mfn_table ) 6.183 + { 6.184 + PERROR("Couldn't map pfn_to_mfn table"); 6.185 + goto out; 6.186 + } 6.187 + 6.188 + for(i=0;i<(nr_pfns+1023)/1024 ;i++) 6.189 + printf("LF: %d %x\n",i,live_pfn_to_mfn_frame_list[i]); 6.190 + 6.191 + 6.192 + /* At this point, we can start the domain again if we're doign a 6.193 + live suspend */ 6.194 + 6.195 + if( live ) 6.196 + { 6.197 +#if 1 6.198 + if ( xc_shadow_control( xc_handle, domid, 6.199 + DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, 6.200 + NULL, 0 ) < 0 ) 6.201 + { 6.202 + ERROR("Couldn't enable shadow mode"); 6.203 + goto out; 6.204 + } 6.205 +#endif 6.206 + if ( xc_domain_start( xc_handle, domid ) < 0 ) 6.207 + { 6.208 + ERROR("Couldn't restart domain"); 6.209 + goto out; 6.210 + } 6.211 +//exit(-1); 6.212 + last_iter = 0; 6.213 + sent_last_iter = 1<<20; // 4GB's worth of pages 6.214 + max_iters = 8; // limit us to 9 time round loop 6.215 + } 6.216 + else 6.217 + last_iter = 1; 6.218 + 6.219 + 6.220 + /* Setup to_send bitmap */ 6.221 + { 6.222 + int sz = (nr_pfns/8) + 8; // includes slop at end of array 6.223 + 6.224 + to_send = malloc( sz ); 6.225 + 6.226 + if (!to_send) 6.227 + { 6.228 + ERROR("Couldn't allocate to_send array"); 6.229 + goto out; 6.230 + } 6.231 + memset( to_send, 0xff, sz ); 6.232 + 6.233 + if ( mlock( to_send, sz ) ) 6.234 + { 6.235 + PERROR("Unable to mlock to_send"); 6.236 + return 1; 6.237 + } 6.238 + } 6.239 + 6.240 + 6.241 + /* We want zeroed memory so use calloc rather than malloc. */ 6.242 + pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long)); 6.243 + 6.244 + if ( (pfn_type == NULL) ) 6.245 + { 6.246 + errno = ENOMEM; 6.247 + goto out; 6.248 + } 6.249 + 6.250 + if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ) 6.251 + { 6.252 + ERROR("Unable to mlock"); 6.253 + goto out; 6.254 + } 6.255 + 6.256 6.257 /* 6.258 * Quick belt and braces sanity check. 6.259 */ 6.260 6.261 - for ( i = 0; i < srec.nr_pfns; i++ ) 6.262 + for ( i = 0; i < nr_pfns; i++ ) 6.263 { 6.264 mfn = live_pfn_to_mfn_table[i]; 6.265 6.266 - if( live_mfn_to_pfn_table[mfn] != i ) 6.267 - printf("i=%d mfn=%d live_mfn_to_pfn_table=%d\n", 6.268 + if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0x80000004) ) 6.269 + printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n", 6.270 i,mfn,live_mfn_to_pfn_table[mfn]); 6.271 } 6.272 6.273 - 6.274 - /* Canonicalise the suspend-record frame number. */ 6.275 - if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ) 6.276 - { 6.277 - ERROR("State record is not in range of pseudophys map"); 6.278 - goto out; 6.279 - } 6.280 - 6.281 - /* Canonicalise each GDT frame number. */ 6.282 - for ( i = 0; i < ctxt.gdt_ents; i += 512 ) 6.283 - { 6.284 - if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) 6.285 - { 6.286 - ERROR("GDT frame is not in range of pseudophys map"); 6.287 - goto out; 6.288 - } 6.289 - } 6.290 - 6.291 - /* Canonicalise the page table base pointer. */ 6.292 - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) 6.293 - { 6.294 - ERROR("PT base is not in range of pseudophys map"); 6.295 - goto out; 6.296 - } 6.297 - ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT; 6.298 - 6.299 /* Canonicalise the pfn-to-mfn table frame-number list. */ 6.300 memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE ); 6.301 - for ( i = 0; i < srec.nr_pfns; i += 1024 ) 6.302 + for ( i = 0; i < nr_pfns; i += 1024 ) 6.303 { 6.304 if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ) 6.305 { 6.306 @@ -284,7 +318,7 @@ int xc_linux_save(int xc_handle, 6.307 } 6.308 } 6.309 6.310 - /* Start writing out the saved-domain record. */ 6.311 + /* Map the shared info frame */ 6.312 live_shinfo = mfn_mapper_map_single(xc_handle, domid, 6.313 PAGE_SIZE, PROT_READ, 6.314 shared_info_frame); 6.315 @@ -295,164 +329,290 @@ int xc_linux_save(int xc_handle, 6.316 goto out; 6.317 } 6.318 6.319 + /* Start writing out the saved-domain record. */ 6.320 + 6.321 if ( (*writerfn)(writerst, "LinuxGuestRecord", 16) || 6.322 (*writerfn)(writerst, name, sizeof(name)) || 6.323 - (*writerfn)(writerst, &srec.nr_pfns, sizeof(unsigned long)) || 6.324 - (*writerfn)(writerst, &ctxt, sizeof(ctxt)) || 6.325 - (*writerfn)(writerst, live_shinfo, PAGE_SIZE) || 6.326 + (*writerfn)(writerst, &nr_pfns, sizeof(unsigned long)) || 6.327 (*writerfn)(writerst, pfn_to_mfn_frame_list, PAGE_SIZE) ) 6.328 { 6.329 ERROR("Error when writing to state file (1)"); 6.330 goto out; 6.331 } 6.332 - munmap(live_shinfo, PAGE_SIZE); 6.333 - 6.334 - verbose_printf("Saving memory pages: 0%%"); 6.335 - 6.336 - if ( (mapper_handle2 = mfn_mapper_init(xc_handle, domid, 6.337 - BATCH_SIZE*4096, PROT_READ )) 6.338 - == NULL ) 6.339 - goto out; 6.340 - 6.341 - region_base = mfn_mapper_base( mapper_handle2 ); 6.342 6.343 /* Now write out each data page, canonicalising page tables as we go... */ 6.344 - prev_pc = 0; 6.345 - for ( n = 0; n < srec.nr_pfns; ) 6.346 + 6.347 + while(1) 6.348 { 6.349 - this_pc = (n * 100) / srec.nr_pfns; 6.350 - if ( (this_pc - prev_pc) >= 5 ) 6.351 - { 6.352 - verbose_printf("\b\b\b\b%3d%%", this_pc); 6.353 - prev_pc = this_pc; 6.354 - } 6.355 + unsigned int prev_pc, batch, sent_this_iter; 6.356 + 6.357 + iter++; 6.358 + 6.359 + sent_this_iter = 0; 6.360 + prev_pc = 0; 6.361 + verbose_printf("Saving memory pages: iter %d 0%%", iter); 6.362 6.363 - for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ ) 6.364 - { 6.365 - pfn_type[j] = live_pfn_to_mfn_table[i]; 6.366 - } 6.367 + n=0; 6.368 + while( n < nr_pfns ) 6.369 + { 6.370 + unsigned int this_pc = (n * 100) / nr_pfns; 6.371 + if ( (this_pc - prev_pc) >= 5 ) 6.372 + { 6.373 + verbose_printf("\b\b\b\b%3d%%", this_pc); 6.374 + prev_pc = this_pc; 6.375 + } 6.376 6.377 6.378 - for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ ) 6.379 - { 6.380 - /* queue up mappings for all of the pages in this batch */ 6.381 + /* load pfn_type[] with the mfn of all the pages we're doing in 6.382 + this batch. */ 6.383 + 6.384 + for( batch = 0; batch < BATCH_SIZE && n < nr_pfns ; n++ ) 6.385 + { 6.386 + if ( !test_bit(n, to_send ) ) continue; 6.387 + 6.388 + pfn_type[batch] = live_pfn_to_mfn_table[n]; 6.389 + 6.390 + if( pfn_type[batch] == 0x80000004 ) 6.391 + { 6.392 + //printf("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]); 6.393 + continue; 6.394 + } 6.395 6.396 -//printf("region n=%d j=%d i=%d mfn=%d\n",n,j,i,live_pfn_to_mfn_table[i]); 6.397 - mfn_mapper_queue_entry( mapper_handle2, j<<PAGE_SHIFT, 6.398 - live_pfn_to_mfn_table[i], 6.399 - PAGE_SIZE ); 6.400 - } 6.401 +//if(iter>1) printf("pfn=%x mfn=%x\n",n,pfn_type[batch]); 6.402 + 6.403 + batch++; 6.404 + } 6.405 + 6.406 + for( j = 0; j < batch; j++ ) 6.407 + { 6.408 + 6.409 + if( (pfn_type[j] &0xfffff) == 0x0000004 ) 6.410 + { 6.411 + printf("XXXXXXXXSkip netbuf entry %d mfn %lx\n",j,pfn_type[j]); 6.412 + } 6.413 + 6.414 + 6.415 + } 6.416 + 6.417 + 6.418 + printf("batch %d:%d (n=%d)\n",iter,batch,n); 6.419 6.420 - if( mfn_mapper_flush_queue(mapper_handle2) ) 6.421 - { 6.422 - ERROR("Couldn't map page region"); 6.423 - goto out; 6.424 - } 6.425 - 6.426 - if ( get_pfn_type_batch(xc_handle, domid, j, pfn_type) ) 6.427 - { 6.428 - ERROR("get_pfn_type_batch failed"); 6.429 - goto out; 6.430 - } 6.431 - 6.432 - for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ ) 6.433 - { 6.434 - if((pfn_type[j]>>29) == 7) 6.435 + if(batch == 0) goto skip; // vanishingly unlikely... 6.436 + 6.437 + if ( (region_base = mfn_mapper_map_batch( xc_handle, domid, 6.438 + PROT_READ, 6.439 + pfn_type, 6.440 + batch )) == 0) 6.441 + { 6.442 + PERROR("map batch failed"); 6.443 + goto out; 6.444 + } 6.445 + 6.446 + if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ) 6.447 { 6.448 - ERROR("bogus page"); 6.449 + ERROR("get_pfn_type_batch failed"); 6.450 + goto out; 6.451 + } 6.452 + 6.453 + for( j = 0; j < batch; j++ ) 6.454 + { 6.455 + if((pfn_type[j]>>29) == 7) 6.456 + { 6.457 + //printf("type fail: page %i mfn %08lx\n",j,pfn_type[j]); 6.458 + continue; 6.459 + } 6.460 +//if((pfn_type[j] & PGT_type_mask) == L2TAB) printf("L2 pfn=%08lx mfn=%lx\n",pfn_type[j],live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]); 6.461 + 6.462 + /* canonicalise mfn->pfn */ 6.463 + pfn_type[j] = (pfn_type[j] & PGT_type_mask) | 6.464 + live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]; 6.465 + } 6.466 + 6.467 + 6.468 + if ( (*writerfn)(writerst, &batch, sizeof(int) ) ) 6.469 + { 6.470 + ERROR("Error when writing to state file (2)"); 6.471 goto out; 6.472 } 6.473 6.474 - /* canonicalise mfn->pfn */ 6.475 - pfn_type[j] = (pfn_type[j] & PGT_type_mask) | 6.476 - live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]; 6.477 + if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) ) 6.478 + { 6.479 + ERROR("Error when writing to state file (3)"); 6.480 + goto out; 6.481 + } 6.482 6.483 -/* if(pfn_type[j]>>29) 6.484 - printf("i=%d type=%d\n",i,pfn_type[i]); */ 6.485 + /* entering this loop, pfn_type is now in pfns (Not mfns) */ 6.486 + for( j = 0; j < batch; j++ ) 6.487 + { 6.488 + /* write out pages in batch */ 6.489 + 6.490 + if((pfn_type[j]>>29) == 7) 6.491 + { 6.492 + //printf("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]); 6.493 + continue; 6.494 + } 6.495 + 6.496 + if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) || 6.497 + ((pfn_type[j] & PGT_type_mask) == L2TAB) ) 6.498 + { 6.499 + 6.500 + memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE); 6.501 + 6.502 + for ( k = 0; 6.503 + k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ? 6.504 + (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); 6.505 + k++ ) 6.506 + { 6.507 + unsigned long pfn; 6.508 + 6.509 + if ( !(page[k] & _PAGE_PRESENT) ) continue; 6.510 + mfn = page[k] >> PAGE_SHIFT; 6.511 + pfn = live_mfn_to_pfn_table[mfn]; 6.512 + 6.513 + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) 6.514 + { 6.515 + printf("FNI %d : [%08lx,%d] pte=%08lx, mfn=%08lx, pfn=%08lx [mfn]=%08lx\n", 6.516 + j, pfn_type[j], k, 6.517 + page[k], mfn, live_mfn_to_pfn_table[mfn], 6.518 + (live_mfn_to_pfn_table[mfn]<nr_pfns)? 6.519 + live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef); 6.520 + pfn = 0; // be suspicious 6.521 + 6.522 +// ERROR("Frame number in pagetable page is invalid"); 6.523 +// goto out; 6.524 + 6.525 + 6.526 + } 6.527 + page[k] &= PAGE_SIZE - 1; 6.528 + page[k] |= pfn << PAGE_SHIFT; 6.529 + 6.530 + /* 6.531 + printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", 6.532 + pfn_type[j]>>29, 6.533 + j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT); 6.534 + */ 6.535 + 6.536 + } /* end of page table rewrite for loop */ 6.537 + 6.538 + if ( (*writerfn)(writerst, page, PAGE_SIZE) ) 6.539 + { 6.540 + ERROR("Error when writing to state file (4)"); 6.541 + goto out; 6.542 + } 6.543 + 6.544 + } /* end of it's a PT page */ 6.545 + else 6.546 + { /* normal page */ 6.547 + if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) ) 6.548 + { 6.549 + ERROR("Error when writing to state file (5)"); 6.550 + goto out; 6.551 + } 6.552 + } 6.553 + } /* end of the write out for this batch */ 6.554 + 6.555 + sent_this_iter += batch; 6.556 + 6.557 + } /* end of this while loop for this iteration */ 6.558 + 6.559 + munmap(region_base, batch*PAGE_SIZE); 6.560 + 6.561 + skip: 6.562 + 6.563 + verbose_printf("\b\b\b\b100%% (%d pages)\n", sent_this_iter ); 6.564 + 6.565 + if ( last_iter ) 6.566 + break; 6.567 + 6.568 + if ( live ) 6.569 + { 6.570 + if ( sent_this_iter < (sent_last_iter * 0.95) && iter < max_iters ) 6.571 + { 6.572 + // we seem to be doing OK, keep going 6.573 + } 6.574 + else 6.575 + { 6.576 + printf("Start last iteration\n"); 6.577 + last_iter = 1; 6.578 + 6.579 + xc_domain_stop_sync( xc_handle, domid ); 6.580 + 6.581 + } 6.582 + 6.583 + if ( xc_shadow_control( xc_handle, domid, 6.584 + DOM0_SHADOW_CONTROL_OP_CLEAN, 6.585 + to_send, nr_pfns ) != nr_pfns ) 6.586 + { 6.587 + ERROR("Error flushing shadow PT"); 6.588 + goto out; 6.589 + } 6.590 + 6.591 +#if 0 6.592 + if(last_iter) memset(to_send, 0xff, (nr_pfns+7)/8 ); 6.593 +#endif 6.594 + 6.595 + sent_last_iter = sent_this_iter; 6.596 } 6.597 6.598 6.599 - if ( (*writerfn)(writerst, &j, sizeof(int) ) ) 6.600 - { 6.601 - ERROR("Error when writing to state file (2)"); 6.602 - goto out; 6.603 - } 6.604 - 6.605 - if ( (*writerfn)(writerst, pfn_type, sizeof(unsigned long)*j ) ) 6.606 - { 6.607 - ERROR("Error when writing to state file (3)"); 6.608 - goto out; 6.609 - } 6.610 - 6.611 - 6.612 - for( j = 0, i = n; j < BATCH_SIZE && i < srec.nr_pfns ; j++, i++ ) 6.613 - { 6.614 - /* write out pages in batch */ 6.615 - 6.616 - if ( ((pfn_type[j] & PGT_type_mask) == L1TAB) || 6.617 - ((pfn_type[j] & PGT_type_mask) == L2TAB) ) 6.618 - { 6.619 - 6.620 - memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE); 6.621 - 6.622 - for ( k = 0; 6.623 - k < (((pfn_type[j] & PGT_type_mask) == L2TAB) ? 6.624 - (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); 6.625 - k++ ) 6.626 - { 6.627 - if ( !(page[k] & _PAGE_PRESENT) ) continue; 6.628 - mfn = page[k] >> PAGE_SHIFT; 6.629 + } /* end of while 1 */ 6.630 6.631 - if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) 6.632 - { 6.633 - ERROR("Frame number in pagetable page is invalid"); 6.634 - goto out; 6.635 - } 6.636 - page[k] &= PAGE_SIZE - 1; 6.637 - page[k] |= live_mfn_to_pfn_table[mfn] << PAGE_SHIFT; 6.638 - 6.639 - /* 6.640 - printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", 6.641 - pfn_type[j]>>29, 6.642 - j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT); 6.643 - */ 6.644 - 6.645 - } 6.646 - 6.647 - if ( (*writerfn)(writerst, page, PAGE_SIZE) ) 6.648 - { 6.649 - ERROR("Error when writing to state file (4)"); 6.650 - goto out; 6.651 - } 6.652 - 6.653 - 6.654 - } 6.655 - else 6.656 - { 6.657 - if ( (*writerfn)(writerst, region_base + (PAGE_SIZE*j), PAGE_SIZE) ) 6.658 - { 6.659 - ERROR("Error when writing to state file (5)"); 6.660 - goto out; 6.661 - } 6.662 - } 6.663 - } 6.664 - 6.665 - n+=j; /* i is the master loop counter */ 6.666 - } 6.667 - 6.668 - verbose_printf("\b\b\b\b100%%\nMemory saved.\n"); 6.669 +printf("All memory is saved\n"); 6.670 6.671 /* Success! */ 6.672 rc = 0; 6.673 - 6.674 + 6.675 /* Zero terminate */ 6.676 if ( (*writerfn)(writerst, &rc, sizeof(int)) ) 6.677 { 6.678 ERROR("Error when writing to state file (6)"); 6.679 goto out; 6.680 } 6.681 - 6.682 6.683 + /* Get the final execution context */ 6.684 + op.cmd = DOM0_GETDOMAININFO; 6.685 + op.u.getdomaininfo.domain = (domid_t)domid; 6.686 + op.u.getdomaininfo.ctxt = &ctxt; 6.687 + if ( (do_dom0_op(xc_handle, &op) < 0) || 6.688 + ((u64)op.u.getdomaininfo.domain != domid) ) 6.689 + { 6.690 + PERROR("Could not get info on domain"); 6.691 + goto out; 6.692 + } 6.693 +printf("A\n"); 6.694 + /* Canonicalise the suspend-record frame number. */ 6.695 + if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ) 6.696 + { 6.697 + ERROR("State record is not in range of pseudophys map"); 6.698 + goto out; 6.699 + } 6.700 +printf("B\n"); 6.701 + /* Canonicalise each GDT frame number. */ 6.702 + for ( i = 0; i < ctxt.gdt_ents; i += 512 ) 6.703 + { 6.704 + if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) 6.705 + { 6.706 + ERROR("GDT frame is not in range of pseudophys map"); 6.707 + goto out; 6.708 + } 6.709 + } 6.710 +printf("C\n"); 6.711 + /* Canonicalise the page table base pointer. */ 6.712 + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) 6.713 + { 6.714 + ERROR("PT base is not in range of pseudophys map"); 6.715 + goto out; 6.716 + } 6.717 + ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT; 6.718 +printf("D\n"); 6.719 + if ( (*writerfn)(writerst, &ctxt, sizeof(ctxt)) || 6.720 + (*writerfn)(writerst, live_shinfo, PAGE_SIZE) ) 6.721 + { 6.722 + ERROR("Error when writing to state file (1)"); 6.723 + goto out; 6.724 + } 6.725 + munmap(live_shinfo, PAGE_SIZE); 6.726 +printf("E\n"); 6.727 out: 6.728 /* Restart the domain if we had to stop it to save its state. */ 6.729 if ( we_stopped_it )
7.1 --- a/tools/xc/lib/xc_private.c Tue May 11 14:31:55 2004 +0000 7.2 +++ b/tools/xc/lib/xc_private.c Tue May 11 14:57:44 2004 +0000 7.3 @@ -47,6 +47,31 @@ void unmap_pfn(int pm_handle, void *vadd 7.4 7.5 /*******************/ 7.6 7.7 +void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot, 7.8 + unsigned long *arr, int num ) 7.9 +{ 7.10 + privcmd_mmapbatch_t ioctlx; 7.11 + void *addr; 7.12 + addr = mmap( NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0 ); 7.13 + if (addr) 7.14 + { 7.15 + ioctlx.num=num; 7.16 + ioctlx.dom=dom; 7.17 + ioctlx.addr=(unsigned long)addr; 7.18 + ioctlx.arr=arr; 7.19 + if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) <0 ) 7.20 + { 7.21 + perror("XXXXXXXX"); 7.22 + munmap(addr, num*PAGE_SIZE); 7.23 + return 0; 7.24 + } 7.25 + } 7.26 + return addr; 7.27 + 7.28 +} 7.29 + 7.30 +/*******************/ 7.31 + 7.32 void * mfn_mapper_map_single(int xc_handle, domid_t dom, 7.33 int size, int prot, 7.34 unsigned long mfn ) 7.35 @@ -64,7 +89,10 @@ void * mfn_mapper_map_single(int xc_hand 7.36 entry.mfn=mfn; 7.37 entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT; 7.38 if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) <0 ) 7.39 + { 7.40 + munmap(addr, size); 7.41 return 0; 7.42 + } 7.43 } 7.44 return addr; 7.45 } 7.46 @@ -295,7 +323,7 @@ static int flush_mmu_updates(int xc_hand 7.47 7.48 hypercall.op = __HYPERVISOR_mmu_update; 7.49 hypercall.arg[0] = (unsigned long)mmu->updates; 7.50 - hypercall.arg[1] = (unsigned long)mmu->idx; 7.51 + hypercall.arg[1] = (unsigned long)&(mmu->idx); 7.52 7.53 if ( mlock(mmu->updates, sizeof(mmu->updates)) != 0 ) 7.54 { 7.55 @@ -342,3 +370,47 @@ int finish_mmu_updates(int xc_handle, mm 7.56 { 7.57 return flush_mmu_updates(xc_handle, mmu); 7.58 } 7.59 + 7.60 + 7.61 +/***********************************************************/ 7.62 + 7.63 +/* this function is a hack until we get proper synchronous domain stop */ 7.64 + 7.65 +int xc_domain_stop_sync( int xc_handle, domid_t domid ) 7.66 +{ 7.67 + dom0_op_t op; 7.68 + 7.69 + while (1) 7.70 + { 7.71 + op.cmd = DOM0_STOPDOMAIN; 7.72 + op.u.stopdomain.domain = (domid_t)domid; 7.73 + if ( do_dom0_op(xc_handle, &op) != 0 ) 7.74 + { 7.75 + PERROR("Stopping target domain failed"); 7.76 + goto out; 7.77 + } 7.78 + 7.79 + usleep(1000); // 1ms 7.80 + printf("Sleep for 1ms\n"); 7.81 + 7.82 + op.cmd = DOM0_GETDOMAININFO; 7.83 + op.u.getdomaininfo.domain = (domid_t)domid; 7.84 + op.u.getdomaininfo.ctxt = NULL; 7.85 + if ( (do_dom0_op(xc_handle, &op) < 0) || 7.86 + ((u64)op.u.getdomaininfo.domain != domid) ) 7.87 + { 7.88 + PERROR("Could not get info on domain"); 7.89 + goto out; 7.90 + } 7.91 + 7.92 + if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED ) 7.93 + { 7.94 + printf("Domain %lld stopped\n",domid); 7.95 + return 0; 7.96 + } 7.97 + 7.98 + } 7.99 + 7.100 +out: 7.101 + return -1; 7.102 +}
8.1 --- a/tools/xc/lib/xc_private.h Tue May 11 14:31:55 2004 +0000 8.2 +++ b/tools/xc/lib/xc_private.h Tue May 11 14:57:44 2004 +0000 8.3 @@ -232,6 +232,9 @@ typedef struct mfn_mapper { 8.4 void * mfn_mapper_map_single(int xc_handle, domid_t dom, int size, int prot, 8.5 unsigned long mfn ); 8.6 8.7 +void * mfn_mapper_map_batch(int xc_handle, domid_t dom, int prot, 8.8 + unsigned long *arr, int num ); 8.9 + 8.10 mfn_mapper_t * mfn_mapper_init(int xc_handle, domid_t dom, int size, int prot); 8.11 8.12 void * mfn_mapper_base(mfn_mapper_t *t); 8.13 @@ -245,5 +248,6 @@ void * mfn_mapper_queue_entry(mfn_mapper 8.14 8.15 /*********************/ 8.16 8.17 +int xc_domain_stop_sync( int xc_handle, domid_t dom ); 8.18 8.19 #endif /* __XC_PRIVATE_H__ */
9.1 --- a/tools/xc/py/Xc.c Tue May 11 14:31:55 2004 +0000 9.2 +++ b/tools/xc/py/Xc.c Tue May 11 14:57:44 2004 +0000 9.3 @@ -190,16 +190,17 @@ static PyObject *pyxc_linux_save(PyObjec 9.4 9.5 u64 dom; 9.6 char *state_file; 9.7 - int progress = 1; 9.8 + int progress = 1, live = 0; 9.9 unsigned int flags = 0; 9.10 9.11 - static char *kwd_list[] = { "dom", "state_file", "progress", NULL }; 9.12 + static char *kwd_list[] = { "dom", "state_file", "progress", "live", NULL }; 9.13 9.14 - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|i", kwd_list, 9.15 - &dom, &state_file, &progress) ) 9.16 + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Ls|ii", kwd_list, 9.17 + &dom, &state_file, &progress, &live) ) 9.18 return NULL; 9.19 9.20 if (progress) flags |= XCFLAGS_VERBOSE; 9.21 + if (live) flags |= XCFLAGS_LIVE; 9.22 9.23 if (strncmp(state_file,"tcp:", strlen("tcp:")) == 0) 9.24 { 9.25 @@ -1273,7 +1274,7 @@ static PyObject *pyxc_shadow_control(PyO 9.26 &dom, &op) ) 9.27 return NULL; 9.28 9.29 - if ( xc_shadow_control(xc->xc_handle, dom, op) != 0 ) 9.30 + if ( xc_shadow_control(xc->xc_handle, dom, op, NULL, 0) < 0 ) 9.31 return PyErr_SetFromErrno(xc_error); 9.32 9.33 Py_INCREF(zero);
10.1 --- a/tools/xend/lib/utils.c Tue May 11 14:31:55 2004 +0000 10.2 +++ b/tools/xend/lib/utils.c Tue May 11 14:57:44 2004 +0000 10.3 @@ -723,6 +723,11 @@ static PyObject *xu_port_new(PyObject *s 10.4 goto fail4; 10.5 } 10.6 10.7 + xup->interface->tx_resp_prod = 0; 10.8 + xup->interface->rx_req_prod = 0; 10.9 + xup->interface->tx_req_prod = 0; 10.10 + xup->interface->rx_resp_prod = 0; 10.11 + 10.12 xup->tx_req_cons = 0; 10.13 xup->tx_resp_prod = 0; 10.14 xup->rx_req_prod = 0;
11.1 --- a/xen/common/dom0_ops.c Tue May 11 14:31:55 2004 +0000 11.2 +++ b/xen/common/dom0_ops.c Tue May 11 14:57:44 2004 +0000 11.3 @@ -525,10 +525,10 @@ long do_dom0_op(dom0_op_t *u_dom0_op) 11.4 p = find_domain_by_id( op->u.shadow_control.domain ); 11.5 if ( p ) 11.6 { 11.7 - ret = shadow_mode_control(p, op->u.shadow_control.op ); 11.8 + ret = shadow_mode_control(p, &op->u.shadow_control ); 11.9 put_task_struct(p); 11.10 - } 11.11 - 11.12 + copy_to_user(u_dom0_op, op, sizeof(*op)); 11.13 + } 11.14 } 11.15 break; 11.16
12.1 --- a/xen/common/domain.c Tue May 11 14:31:55 2004 +0000 12.2 +++ b/xen/common/domain.c Tue May 11 14:57:44 2004 +0000 12.3 @@ -89,9 +89,15 @@ struct task_struct *do_createdomain(domi 12.4 memset(p->shared_info, 0, PAGE_SIZE); 12.5 SHARE_PFN_WITH_DOMAIN(virt_to_page(p->shared_info), p); 12.6 12.7 + machine_to_phys_mapping[virt_to_phys(p->shared_info) >> PAGE_SHIFT] = 12.8 + 0x80000000UL; // set m2p table to magic marker (helps debug) 12.9 + 12.10 p->mm.perdomain_pt = (l1_pgentry_t *)get_free_page(GFP_KERNEL); 12.11 memset(p->mm.perdomain_pt, 0, PAGE_SIZE); 12.12 12.13 + machine_to_phys_mapping[virt_to_phys(p->mm.perdomain_pt) >> PAGE_SHIFT] = 12.14 + 0x0fffdeadUL; // set m2p table to magic marker (helps debug) 12.15 + 12.16 init_blkdev_info(p); 12.17 12.18 /* Per-domain PCI-device list. */ 12.19 @@ -486,6 +492,7 @@ void free_all_dom_mem(struct task_struct 12.20 unsigned int alloc_new_dom_mem(struct task_struct *p, unsigned int kbytes) 12.21 { 12.22 unsigned int alloc_pfns, nr_pages; 12.23 + struct pfn_info *page; 12.24 12.25 nr_pages = (kbytes + ((PAGE_SIZE-1)>>10)) >> (PAGE_SHIFT - 10); 12.26 p->max_pages = nr_pages; /* this can now be controlled independently */ 12.27 @@ -493,13 +500,16 @@ unsigned int alloc_new_dom_mem(struct ta 12.28 /* grow the allocation if necessary */ 12.29 for ( alloc_pfns = p->tot_pages; alloc_pfns < nr_pages; alloc_pfns++ ) 12.30 { 12.31 - if ( unlikely(alloc_domain_page(p) == NULL) || 12.32 + if ( unlikely((page=alloc_domain_page(p)) == NULL) || 12.33 unlikely(free_pfns < (SLACK_DOMAIN_MEM_KILOBYTES >> 12.34 (PAGE_SHIFT-10))) ) 12.35 { 12.36 free_all_dom_mem(p); 12.37 return -ENOMEM; 12.38 } 12.39 + 12.40 + /* initialise to machine_to_phys_mapping table to likely pfn */ 12.41 + machine_to_phys_mapping[page-frame_table] = alloc_pfns; 12.42 } 12.43 12.44 p->tot_pages = nr_pages;
13.1 --- a/xen/common/memory.c Tue May 11 14:31:55 2004 +0000 13.2 +++ b/xen/common/memory.c Tue May 11 14:57:44 2004 +0000 13.3 @@ -213,7 +213,12 @@ void __init init_frametable(unsigned lon 13.4 belonging to the machine_to_phys_mapping to CPU0 idle task */ 13.5 13.6 mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT; 13.7 -// for(i=0;i<nr_pages;i+=1024,mfn++) 13.8 + 13.9 + /* initialise to a magic of 0x55555555 so easier to spot bugs later */ 13.10 + memset( machine_to_phys_mapping, 0x55, 4*1024*1024 ); 13.11 + 13.12 + /* The array is sized for a 4GB machine regardless of actuall mem size. 13.13 + This costs 4MB -- may want to fix some day */ 13.14 for(i=0;i<1024*1024;i+=1024,mfn++) 13.15 { 13.16 frame_table[mfn].count_and_flags = 1 | PGC_allocated; 13.17 @@ -325,7 +330,7 @@ static int get_page_from_pagenr(unsigned 13.18 13.19 if ( unlikely(!get_page(page, p)) ) 13.20 { 13.21 - MEM_LOG("Could not get page ref for pfn %08lx\n", page_nr); 13.22 + MEM_LOG("Could not get page ref for pfn %08lx", page_nr); 13.23 return 0; 13.24 } 13.25 13.26 @@ -944,8 +949,9 @@ static int do_extended_command(unsigned 13.27 } 13.28 13.29 13.30 -int do_mmu_update(mmu_update_t *ureqs, int count) 13.31 +int do_mmu_update(mmu_update_t *ureqs, int * p_count) 13.32 { 13.33 + int count; 13.34 mmu_update_t req; 13.35 unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0; 13.36 struct pfn_info *page; 13.37 @@ -954,6 +960,11 @@ int do_mmu_update(mmu_update_t *ureqs, i 13.38 unsigned long prev_spfn = 0; 13.39 l1_pgentry_t *prev_spl1e = 0; 13.40 13.41 + if ( unlikely( get_user(count, p_count) ) ) 13.42 + { 13.43 + return -EFAULT; 13.44 + } 13.45 + 13.46 perfc_incrc(calls_to_mmu_update); 13.47 perfc_addc(num_page_updates, count); 13.48 13.49 @@ -1110,6 +1121,9 @@ int do_mmu_update(mmu_update_t *ureqs, i 13.50 percpu_info[cpu].gps = percpu_info[cpu].pts = NULL; 13.51 } 13.52 13.53 + if ( unlikely(rc) ) 13.54 + put_user( count, p_count ); 13.55 + 13.56 return rc; 13.57 } 13.58
14.1 --- a/xen/common/network.c Tue May 11 14:31:55 2004 +0000 14.2 +++ b/xen/common/network.c Tue May 11 14:57:44 2004 +0000 14.3 @@ -111,6 +111,9 @@ net_vif_t *create_net_vif(domid_t dom) 14.4 clear_page(new_ring); 14.5 SHARE_PFN_WITH_DOMAIN(virt_to_page(new_ring), p); 14.6 14.7 + machine_to_phys_mapping[virt_to_phys(new_ring)>>PAGE_SHIFT] = 14.8 + 0x80000001; // magic value aids debugging 14.9 + 14.10 /* 14.11 * Fill in the new vif struct. Note that, while the vif's refcnt is 14.12 * non-zero, we hold a reference to the task structure.
15.1 --- a/xen/common/shadow.c Tue May 11 14:31:55 2004 +0000 15.2 +++ b/xen/common/shadow.c Tue May 11 14:57:44 2004 +0000 15.3 @@ -123,6 +123,7 @@ static inline int shadow_page_op( struct 15.4 } 15.5 return work; 15.6 } 15.7 + 15.8 static void __scan_shadow_table( struct mm_struct *m, unsigned int op ) 15.9 { 15.10 int j, work=0; 15.11 @@ -150,7 +151,7 @@ static void __scan_shadow_table( struct 15.12 } 15.13 shadow_audit(m,0); 15.14 } 15.15 - SH_LOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages)); 15.16 + SH_VLOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages)); 15.17 } 15.18 15.19 15.20 @@ -160,7 +161,6 @@ int shadow_mode_enable( struct task_stru 15.21 struct shadow_status **fptr; 15.22 int i; 15.23 15.24 - 15.25 spin_lock_init(&m->shadow_lock); 15.26 spin_lock(&m->shadow_lock); 15.27 15.28 @@ -217,7 +217,6 @@ int shadow_mode_enable( struct task_stru 15.29 15.30 // call shadow_mk_pagetable 15.31 shadow_mk_pagetable( m ); 15.32 - 15.33 return 0; 15.34 15.35 nomem: 15.36 @@ -260,9 +259,12 @@ void shadow_mode_disable( struct task_st 15.37 kfree( &m->shadow_ht[0] ); 15.38 } 15.39 15.40 -static void shadow_mode_table_op( struct task_struct *p, unsigned int op ) 15.41 +static int shadow_mode_table_op( struct task_struct *p, 15.42 + dom0_shadow_control_t *sc ) 15.43 { 15.44 + unsigned int op = sc->op; 15.45 struct mm_struct *m = &p->mm; 15.46 + int rc = 0; 15.47 15.48 // since Dom0 did the hypercall, we should be running with it's page 15.49 // tables right now. Calling flush on yourself would be really 15.50 @@ -271,13 +273,13 @@ static void shadow_mode_table_op( struct 15.51 if ( m == ¤t->mm ) 15.52 { 15.53 printk("Don't try and flush your own page tables!\n"); 15.54 - return; 15.55 + return -EINVAL; 15.56 } 15.57 15.58 15.59 spin_lock(&m->shadow_lock); 15.60 15.61 - SH_LOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count); 15.62 + SH_VLOG("shadow mode table op %08lx %08lx count %d",pagetable_val( m->pagetable),pagetable_val(m->shadow_table), m->shadow_page_count); 15.63 15.64 shadow_audit(m,1); 15.65 15.66 @@ -288,27 +290,60 @@ static void shadow_mode_table_op( struct 15.67 break; 15.68 15.69 case DOM0_SHADOW_CONTROL_OP_CLEAN: 15.70 - __scan_shadow_table( m, op ); 15.71 - // we used to bzero dirty bitmap here, but now leave this to user space 15.72 - // if we were double buffering we'd do the flip here 15.73 + { 15.74 + int i; 15.75 + 15.76 + __scan_shadow_table( m, op ); 15.77 + 15.78 + if( p->tot_pages > sc->pages || 15.79 + !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap ) 15.80 + { 15.81 + rc = -EINVAL; 15.82 + goto out; 15.83 + } 15.84 + 15.85 + sc->pages = p->tot_pages; 15.86 + 15.87 +#define chunk (8*1024) // do this in 1KB chunks for L1 cache 15.88 + 15.89 + for(i=0;i<p->tot_pages;i+=chunk) 15.90 + { 15.91 + int bytes = (( ((p->tot_pages-i) > (chunk))? 15.92 + (chunk):(p->tot_pages-i) ) + 7) / 8; 15.93 + 15.94 + copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))), 15.95 + p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), 15.96 + bytes ); 15.97 + 15.98 + memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), 15.99 + 0, bytes); 15.100 + } 15.101 + 15.102 break; 15.103 + } 15.104 } 15.105 15.106 + 15.107 +out: 15.108 + 15.109 spin_unlock(&m->shadow_lock); 15.110 15.111 - SH_LOG("shadow mode table op : page count %d", m->shadow_page_count); 15.112 + SH_VLOG("shadow mode table op : page count %d", m->shadow_page_count); 15.113 15.114 shadow_audit(m,1); 15.115 15.116 // call shadow_mk_pagetable 15.117 shadow_mk_pagetable( m ); 15.118 15.119 + return rc; 15.120 } 15.121 15.122 15.123 -int shadow_mode_control( struct task_struct *p, unsigned int op ) 15.124 +int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc ) 15.125 { 15.126 int we_paused = 0; 15.127 + unsigned int cmd = sc->op; 15.128 + int rc = 0; 15.129 15.130 // don't call if already shadowed... 15.131 15.132 @@ -321,18 +356,23 @@ int shadow_mode_control( struct task_str 15.133 we_paused = 1; 15.134 } 15.135 15.136 - if ( p->mm.shadow_mode && op == DOM0_SHADOW_CONTROL_OP_OFF ) 15.137 + if ( p->mm.shadow_mode && cmd == DOM0_SHADOW_CONTROL_OP_OFF ) 15.138 { 15.139 shadow_mode_disable(p); 15.140 } 15.141 - else if ( op == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST ) 15.142 + else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST ) 15.143 { 15.144 if(p->mm.shadow_mode) shadow_mode_disable(p); 15.145 shadow_mode_enable(p, SHM_test); 15.146 } 15.147 - else if ( p->mm.shadow_mode && op >= DOM0_SHADOW_CONTROL_OP_FLUSH && op<=DOM0_SHADOW_CONTROL_OP_CLEAN ) 15.148 + else if ( cmd == DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY ) 15.149 { 15.150 - shadow_mode_table_op(p, op); 15.151 + if(p->mm.shadow_mode) shadow_mode_disable(p); 15.152 + shadow_mode_enable(p, SHM_logdirty); 15.153 + } 15.154 + else if ( p->mm.shadow_mode && cmd >= DOM0_SHADOW_CONTROL_OP_FLUSH && cmd<=DOM0_SHADOW_CONTROL_OP_CLEAN ) 15.155 + { 15.156 + rc = shadow_mode_table_op(p, sc); 15.157 } 15.158 else 15.159 { 15.160 @@ -341,7 +381,7 @@ int shadow_mode_control( struct task_str 15.161 } 15.162 15.163 if ( we_paused ) wake_up(p); 15.164 - return 0; 15.165 + return rc; 15.166 } 15.167 15.168
16.1 --- a/xen/drivers/block/xen_block.c Tue May 11 14:31:55 2004 +0000 16.2 +++ b/xen/drivers/block/xen_block.c Tue May 11 14:57:44 2004 +0000 16.3 @@ -19,6 +19,7 @@ 16.4 #include <xen/interrupt.h> 16.5 #include <xen/vbd.h> 16.6 #include <xen/slab.h> 16.7 +#include <xen/shadow.h> 16.8 16.9 /* 16.10 * These are rather arbitrary. They are fairly large because adjacent requests 16.11 @@ -358,9 +359,18 @@ static void unlock_buffer(unsigned long 16.12 pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); 16.13 pfn++ ) 16.14 { 16.15 + 16.16 + /* Find the domain from the frame_table. Yuk... */ 16.17 + struct task_struct *p = frame_table[pfn].u.domain; 16.18 + 16.19 + if( p->mm.shadow_mode == SHM_logdirty ) 16.20 + mark_dirty( &p->mm, pfn ); 16.21 + 16.22 + 16.23 if ( writeable_buffer ) 16.24 put_page_type(&frame_table[pfn]); 16.25 put_page(&frame_table[pfn]); 16.26 + 16.27 } 16.28 } 16.29 16.30 @@ -597,6 +607,10 @@ void init_blkdev_info(struct task_struct 16.31 p->blk_ring_base = (blk_ring_t *)get_free_page(GFP_KERNEL); 16.32 clear_page(p->blk_ring_base); 16.33 SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p); 16.34 + 16.35 + machine_to_phys_mapping[virt_to_phys(p->blk_ring_base)>>PAGE_SHIFT] = 16.36 + 0x80000002; // magic value aids debugging 16.37 + 16.38 p->blkdev_list.next = NULL; 16.39 spin_lock_init(&p->vbd_lock); 16.40 }
17.1 --- a/xen/include/asm-i386/processor.h Tue May 11 14:31:55 2004 +0000 17.2 +++ b/xen/include/asm-i386/processor.h Tue May 11 14:57:44 2004 +0000 17.3 @@ -449,7 +449,7 @@ struct mm_struct { 17.4 struct shadow_status *shadow_ht; 17.5 struct shadow_status *shadow_ht_free; 17.6 struct shadow_status *shadow_ht_extras; /* extra allocation units */ 17.7 - unsigned int *shadow_dirty_bitmap; 17.8 + unsigned long *shadow_dirty_bitmap; 17.9 unsigned int shadow_dirty_bitmap_size; /* in pages, bit per page */ 17.10 unsigned int shadow_page_count; 17.11 unsigned int shadow_max_page_count;
18.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h Tue May 11 14:31:55 2004 +0000 18.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h Tue May 11 14:57:44 2004 +0000 18.3 @@ -243,6 +243,9 @@ typedef struct dom0_shadow_control_st 18.4 /* IN variables. */ 18.5 domid_t domain; 18.6 int op; 18.7 + unsigned long *dirty_bitmap; // pointe to mlocked buffer 18.8 + /* IN/OUT variables */ 18.9 + unsigned long pages; // size of buffer, updated with actual size 18.10 } dom0_shadow_control_t; 18.11 18.12 #define DOM0_SETDOMAINNAME 26
19.1 --- a/xen/include/xen/mm.h Tue May 11 14:31:55 2004 +0000 19.2 +++ b/xen/include/xen/mm.h Tue May 11 14:57:44 2004 +0000 19.3 @@ -164,8 +164,8 @@ static inline int get_page(struct pfn_in 19.4 unlikely(x & PGC_zombie) || /* Zombie? */ 19.5 unlikely(p != domain) ) /* Wrong owner? */ 19.6 { 19.7 - DPRINTK("Error pfn %08lx: ed=%p(%lld), sd=%p(%lld), caf=%08x\n", 19.8 - page_to_pfn(page), domain, (domain)?domain->domain:1234, p, (p)?p->domain:1234, x); 19.9 + DPRINTK("Error pfn %08lx: ed=%p(%lld), sd=%p(%lld), caf=%08x, taf=%08x\n", 19.10 + page_to_pfn(page), domain, (domain)?domain->domain:999, p, (p && !((x & PGC_count_mask) == 0))?p->domain:999, x, page->type_and_flags); 19.11 return 0; 19.12 } 19.13 __asm__ __volatile__( 19.14 @@ -314,7 +314,7 @@ int check_descriptor(unsigned long a, un 19.15 #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START) 19.16 19.17 /* Part of the domain API. */ 19.18 -int do_mmu_update(mmu_update_t *updates, int count); 19.19 +int do_mmu_update(mmu_update_t *updates, int *count); 19.20 19.21 #define DEFAULT_GDT_ENTRIES ((LAST_RESERVED_GDT_ENTRY*8)+7) 19.22 #define DEFAULT_GDT_ADDRESS ((unsigned long)gdt_table)
20.1 --- a/xen/include/xen/shadow.h Tue May 11 14:31:55 2004 +0000 20.2 +++ b/xen/include/xen/shadow.h Tue May 11 14:57:44 2004 +0000 20.3 @@ -23,7 +23,7 @@ 20.4 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START) 20.5 #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT)))) 20.6 20.7 -extern int shadow_mode_control( struct task_struct *p, unsigned int op ); 20.8 +extern int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc ); 20.9 extern int shadow_fault( unsigned long va, long error_code ); 20.10 extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 20.11 unsigned long *prev_spfn_ptr, 20.12 @@ -50,7 +50,7 @@ struct shadow_status { 20.13 20.14 #ifndef NDEBUG 20.15 #define SH_LOG(_f, _a...) \ 20.16 - printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 20.17 + printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ 20.18 current->domain , __LINE__ , ## _a ) 20.19 #else 20.20 #define SH_LOG(_f, _a...) 20.21 @@ -58,7 +58,7 @@ struct shadow_status { 20.22 20.23 #if SHADOW_DEBUG 20.24 #define SH_VLOG(_f, _a...) \ 20.25 - printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 20.26 + printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ 20.27 current->domain , __LINE__ , ## _a ) 20.28 #else 20.29 #define SH_VLOG(_f, _a...) 20.30 @@ -66,19 +66,27 @@ struct shadow_status { 20.31 20.32 #if 0 20.33 #define SH_VVLOG(_f, _a...) \ 20.34 - printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \ 20.35 + printk("DOM%lld: (file=shadow.c, line=%d) " _f "\n", \ 20.36 current->domain , __LINE__ , ## _a ) 20.37 #else 20.38 #define SH_VVLOG(_f, _a...) 20.39 #endif 20.40 20.41 20.42 - 20.43 /************************************************************************/ 20.44 20.45 static inline void mark_dirty( struct mm_struct *m, unsigned int mfn ) 20.46 { 20.47 - unsigned int pfn = machine_to_phys_mapping[mfn]; 20.48 + unsigned int pfn; 20.49 + 20.50 + pfn = machine_to_phys_mapping[mfn]; 20.51 + 20.52 + /* We use values with the top bit set to mark MFNs that aren't 20.53 + really part of the domain's psuedo-physical memory map e.g. 20.54 + the shared info frame. Nothing to do here... 20.55 + */ 20.56 + if ( unlikely(pfn & 0x80000000U) ) return; 20.57 + 20.58 ASSERT(m->shadow_dirty_bitmap); 20.59 if( likely(pfn<m->shadow_dirty_bitmap_size) ) 20.60 { 20.61 @@ -91,7 +99,14 @@ static inline void mark_dirty( struct mm 20.62 } 20.63 else 20.64 { 20.65 - SH_LOG("mark_dirty pfn out of range attempt!"); 20.66 + extern void show_traceX(void); 20.67 + SH_LOG("mark_dirty OOR! mfn=%x pfn=%x max=%x (mm %p)", 20.68 + mfn, pfn, m->shadow_dirty_bitmap_size, m ); 20.69 + SH_LOG("dom=%lld caf=%08x taf=%08x\n", 20.70 + frame_table[mfn].u.domain->domain, 20.71 + frame_table[mfn].count_and_flags, 20.72 + frame_table[mfn].type_and_flags ); 20.73 + //show_traceX(); 20.74 } 20.75 20.76 } 20.77 @@ -116,7 +131,7 @@ static inline void l1pte_write_fault( st 20.78 spte = gpte; 20.79 gpte |= _PAGE_DIRTY | _PAGE_ACCESSED; 20.80 spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; 20.81 - mark_dirty( m, gpte >> PAGE_SHIFT ); 20.82 + mark_dirty( m, (gpte >> PAGE_SHIFT) ); 20.83 break; 20.84 } 20.85 20.86 @@ -343,7 +358,7 @@ static inline unsigned long get_shadow_s 20.87 20.88 if( m->shadow_mode == SHM_logdirty ) 20.89 mark_dirty( m, gpfn ); 20.90 - 20.91 + 20.92 spin_lock(&m->shadow_lock); 20.93 res = __shadow_status( m, gpfn ); 20.94 if (!res) spin_unlock(&m->shadow_lock);
21.1 --- a/xen/net/dev.c Tue May 11 14:31:55 2004 +0000 21.2 +++ b/xen/net/dev.c Tue May 11 14:57:44 2004 +0000 21.3 @@ -547,6 +547,9 @@ void deliver_packet(struct sk_buff *skb, 21.4 goto out; 21.5 } 21.6 21.7 + machine_to_phys_mapping[new_page - frame_table] = 21.8 + machine_to_phys_mapping[old_page - frame_table]; 21.9 + 21.10 if ( p->mm.shadow_mode && 21.11 (spte_pfn=get_shadow_status(&p->mm, pte_page-frame_table)) ) 21.12 { 21.13 @@ -557,17 +560,15 @@ void deliver_packet(struct sk_buff *skb, 21.14 *sptr = new_pte; 21.15 unmap_domain_mem(sptr); 21.16 21.17 - if( p->mm.shadow_mode == SHM_logdirty ) 21.18 - mark_dirty( &p->mm, new_page-frame_table ); 21.19 - 21.20 put_shadow_status(&p->mm); 21.21 } 21.22 - 21.23 - machine_to_phys_mapping[new_page - frame_table] 21.24 - = machine_to_phys_mapping[old_page - frame_table]; 21.25 21.26 unmap_domain_mem(ptep); 21.27 21.28 + /* if in shadow mode, mark the buffer as dirty */ 21.29 + if( p->mm.shadow_mode == SHM_logdirty ) 21.30 + mark_dirty( &p->mm, (new_page-frame_table) ); 21.31 + 21.32 /* Updates must happen before releasing the descriptor. */ 21.33 smp_wmb(); 21.34 21.35 @@ -2143,8 +2144,6 @@ static void get_rx_bufs(net_vif_t *vif) 21.36 put_page_and_type(pte_page); 21.37 make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); 21.38 goto rx_unmap_and_continue; 21.39 - 21.40 - /* XXX IAP should SHADOW_CONFIG do something here? */ 21.41 } 21.42 21.43 /* 21.44 @@ -2156,9 +2155,11 @@ static void get_rx_bufs(net_vif_t *vif) 21.45 0) != 21.46 (PGC_allocated | PGC_tlb_flush_on_type_change | 2)) ) 21.47 { 21.48 - DPRINTK("Page held more than once %08x %s\n", 21.49 + DPRINTK("Page held more than once mfn=%x %08x %s\n", 21.50 + buf_page-frame_table, 21.51 buf_page->count_and_flags, 21.52 (buf_page->u.domain)?buf_page->u.domain->name:"None"); 21.53 + 21.54 if ( !get_page_type(buf_page, PGT_writeable_page) ) 21.55 put_page(buf_page); 21.56 else if ( cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != 21.57 @@ -2264,6 +2265,13 @@ long flush_bufs_for_vif(net_vif_t *vif) 21.58 21.59 put_page_and_type(&frame_table[rx->pte_ptr >> PAGE_SHIFT]); 21.60 21.61 + /* if in shadow mode, mark the PTE as dirty */ 21.62 + if( p->mm.shadow_mode == SHM_logdirty ) 21.63 + mark_dirty( &p->mm, rx->pte_ptr>>PAGE_SHIFT ); 21.64 + /* assume the shadow page table is about to be blown away, 21.65 + and that its not worth marking the buffer as dirty */ 21.66 + 21.67 + 21.68 make_rx_response(vif, rx->id, 0, RING_STATUS_DROPPED, 0); 21.69 } 21.70 vif->rx_cons = i;
22.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c Tue May 11 14:31:55 2004 +0000 22.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c Tue May 11 14:57:44 2004 +0000 22.3 @@ -527,8 +527,6 @@ static void reset_xlblk_interface(void) 22.4 { 22.5 block_io_op_t op; 22.6 22.7 - nr_pending = 0; 22.8 - 22.9 op.cmd = BLOCK_IO_OP_RESET; 22.10 if ( HYPERVISOR_block_io_op(&op) != 0 ) 22.11 printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); 22.12 @@ -549,6 +547,8 @@ int __init xlblk_init(void) 22.13 { 22.14 int error; 22.15 22.16 + nr_pending = 0; 22.17 + 22.18 reset_xlblk_interface(); 22.19 22.20 xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
23.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c Tue May 11 14:31:55 2004 +0000 23.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c Tue May 11 14:57:44 2004 +0000 23.3 @@ -103,12 +103,12 @@ static int privcmd_ioctl(struct inode *i 23.4 if (msg[j].va + (msg[j].npages<<PAGE_SHIFT) > vma->vm_end) 23.5 return -EINVAL; 23.6 23.7 - if (rc = direct_remap_area_pages(vma->vm_mm, 23.8 + if ( (rc = direct_remap_area_pages(vma->vm_mm, 23.9 msg[j].va&PAGE_MASK, 23.10 msg[j].mfn<<PAGE_SHIFT, 23.11 msg[j].npages<<PAGE_SHIFT, 23.12 vma->vm_page_prot, 23.13 - mmapcmd.dom)) 23.14 + mmapcmd.dom)) <0) 23.15 return rc; 23.16 } 23.17 } 23.18 @@ -116,6 +116,91 @@ static int privcmd_ioctl(struct inode *i 23.19 } 23.20 break; 23.21 23.22 + case IOCTL_PRIVCMD_MMAPBATCH: 23.23 + { 23.24 +#define MAX_DIRECTMAP_MMU_QUEUE 130 23.25 + mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v; 23.26 + privcmd_mmapbatch_t m; 23.27 + struct vm_area_struct *vma = NULL; 23.28 + unsigned long *p, addr; 23.29 + unsigned long mfn; 23.30 + int i; 23.31 + 23.32 + if ( copy_from_user(&m, (void *)data, sizeof(m)) ) 23.33 + { ret = -EFAULT; goto batch_err; } 23.34 + 23.35 + vma = find_vma( current->mm, m.addr ); 23.36 + 23.37 + if (!vma) 23.38 + { ret = -EINVAL; goto batch_err; } 23.39 + 23.40 + if (m.addr > PAGE_OFFSET) 23.41 + { ret = -EFAULT; goto batch_err; } 23.42 + 23.43 + if (m.addr + (m.num<<PAGE_SHIFT) > vma->vm_end) 23.44 + { ret = -EFAULT; goto batch_err; } 23.45 + 23.46 + // everything fits inside the vma 23.47 + 23.48 +//printk("direct_r_a_p sx=%ld address=%lx macaddr=%lx dom=%lld\n",size,address,machine_addr,domid); 23.49 +// memset( u, 0, sizeof(mmu_update_t)*MAX_DIRECTMAP_MMU_QUEUE );// XXX 23.50 + 23.51 + 23.52 + if ( m.dom != 0 ) 23.53 + { 23.54 + u[0].val = (unsigned long)(m.dom<<16) & ~0xFFFFUL; 23.55 + u[0].ptr = (unsigned long)(m.dom<< 0) & ~0xFFFFUL; 23.56 + u[1].val = (unsigned long)(m.dom>>16) & ~0xFFFFUL; 23.57 + u[1].ptr = (unsigned long)(m.dom>>32) & ~0xFFFFUL; 23.58 + u[0].ptr |= MMU_EXTENDED_COMMAND; 23.59 + u[0].val |= MMUEXT_SET_SUBJECTDOM_L; 23.60 + u[1].ptr |= MMU_EXTENDED_COMMAND; 23.61 + u[1].val |= MMUEXT_SET_SUBJECTDOM_H; 23.62 + v = w = &u[2]; 23.63 + } 23.64 + else 23.65 + { 23.66 + v = w = &u[0]; 23.67 + } 23.68 + 23.69 + p = m.arr; 23.70 + addr = m.addr; 23.71 +//printk("BATCH: arr=%p addr=%lx num=%d u=%p,w=%p\n",p,addr,m.num,u,w); 23.72 + for (i=0; i<m.num; i++, addr+=PAGE_SIZE, p++) 23.73 + { 23.74 + unsigned int count; 23.75 + if ( get_user(mfn, p) ) return -EFAULT; 23.76 + 23.77 + v->val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot) | 23.78 + _PAGE_IO; 23.79 + 23.80 + __direct_remap_area_pages( vma->vm_mm, 23.81 + addr, 23.82 + PAGE_SIZE, 23.83 + v); 23.84 + v++; 23.85 + count = v-u; 23.86 +//printk("Q i=%d mfn=%x co=%d v=%p : %lx %lx\n",i,mfn,count,v, w->val,w->ptr); 23.87 + 23.88 + if ( HYPERVISOR_mmu_update(u, &count) < 0 ) 23.89 + { 23.90 + //printk("Fail %d->%d mfn=%lx\n",v-u,count, w->val); 23.91 + put_user( 0xe0000000 | mfn, p ); 23.92 + } 23.93 + v=w; 23.94 + } 23.95 + ret = 0; 23.96 + break; 23.97 + 23.98 + batch_err: 23.99 + printk("batch_err ret=%d vma=%p addr=%lx num=%d arr=%lx %lx-%lx\n", 23.100 + ret, vma, m.addr, m.num, m.arr, vma->vm_start, vma->vm_end); 23.101 + break; 23.102 + } 23.103 + break; 23.104 + 23.105 + 23.106 + 23.107 default: 23.108 ret = -EINVAL; 23.109 break;
24.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c Tue May 11 14:31:55 2004 +0000 24.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/network/network.c Tue May 11 14:57:44 2004 +0000 24.3 @@ -248,6 +248,8 @@ static void network_alloc_rx_buffers(str 24.4 np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 24.5 virt_to_machine(get_ppte(skb->head)); 24.6 24.7 + /* Shadow optimisation: disown this page from p->m map */ 24.8 + phys_to_machine_mapping[virt_to_phys(skb->head)>>PAGE_SHIFT] = 0x80000004; 24.9 np->rx_bufs_to_notify++; 24.10 } 24.11 while ( (++i - np->rx_resp_cons) != XENNET_RX_RING_SIZE ); 24.12 @@ -364,6 +366,9 @@ static inline void _network_interrupt(st 24.13 skb = np->rx_skbs[rx->id]; 24.14 ADD_ID_TO_FREELIST(np->rx_skbs, rx->id); 24.15 24.16 + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = 24.17 + (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; 24.18 + 24.19 if ( unlikely(rx->status != RING_STATUS_OK) ) 24.20 { 24.21 /* Gate this error. We get a (valid) slew of them on suspend. */ 24.22 @@ -382,9 +387,6 @@ static inline void _network_interrupt(st 24.23 skb_shinfo(skb)->nr_frags = 0; 24.24 skb_shinfo(skb)->frag_list = NULL; 24.25 24.26 - phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] = 24.27 - (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT; 24.28 - 24.29 skb->data = skb->tail = skb->head + rx->offset; 24.30 skb_put(skb, rx->size); 24.31 skb->protocol = eth_type_trans(skb, dev);
25.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c Tue May 11 14:31:55 2004 +0000 25.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/setup.c Tue May 11 14:57:44 2004 +0000 25.3 @@ -1161,11 +1161,11 @@ static void stop_task(void *unused) 25.4 virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; 25.5 suspend_record->nr_pfns = max_pfn; 25.6 25.7 - j = 0; 25.8 - for ( i = 0; i < max_pfn; i += (PAGE_SIZE / sizeof(unsigned long)) ) 25.9 - pfn_to_mfn_frame_list[j++] = 25.10 + for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) 25.11 + { 25.12 + pfn_to_mfn_frame_list[j] = 25.13 virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT; 25.14 - 25.15 + } 25.16 /* 25.17 * NB. This is /not/ a full dev_close() as that loses route information! 25.18 * Instead we do essentialy the same as dev_close() but without notifying 25.19 @@ -1207,7 +1207,9 @@ static void stop_task(void *unused) 25.20 memcpy(&start_info, &suspend_record->resume_info, sizeof(start_info)); 25.21 25.22 set_fixmap(FIX_SHARED_INFO, start_info.shared_info); 25.23 + 25.24 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 25.25 + 25.26 memset(empty_zero_page, 0, PAGE_SIZE); 25.27 25.28 irq_resume();
26.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/time.c Tue May 11 14:31:55 2004 +0000 26.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/time.c Tue May 11 14:57:44 2004 +0000 26.3 @@ -62,6 +62,7 @@ 26.4 #include <linux/smp.h> 26.5 #include <linux/irq.h> 26.6 #include <linux/sysctl.h> 26.7 +#include <linux/sysrq.h> 26.8 26.9 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; 26.10 extern rwlock_t xtime_lock; 26.11 @@ -581,6 +582,10 @@ static void dbg_time_int(int irq, void * 26.12 timer->expires,(u32)(t_st>>32), (u32)t_st); 26.13 printk(KERN_ALERT "time: processed_system_time=0x%X:%08X\n", 26.14 (u32)(processed_system_time>>32), (u32)processed_system_time); 26.15 + 26.16 + 26.17 + handle_sysrq('t',NULL,NULL,NULL); 26.18 + 26.19 } 26.20 26.21 static struct irqaction dbg_time = {
27.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c Tue May 11 14:31:55 2004 +0000 27.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/traps.c Tue May 11 14:57:44 2004 +0000 27.3 @@ -317,16 +317,17 @@ asmlinkage void do_general_protection(st 27.4 __asm__ __volatile__ ( "sldt %0" : "=r" (ldt) ); 27.5 if ( ldt == 0 ) 27.6 { 27.7 - mmu_update_t u; 27.8 - u.ptr = MMU_EXTENDED_COMMAND; 27.9 - u.ptr |= (unsigned long)&default_ldt[0]; 27.10 - u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT); 27.11 - if ( unlikely(HYPERVISOR_mmu_update(&u, 1) < 0) ) 27.12 - { 27.13 - show_trace(NULL); 27.14 - panic("Failed to install default LDT"); 27.15 - } 27.16 - return; 27.17 + int count = 1; 27.18 + mmu_update_t u; 27.19 + u.ptr = MMU_EXTENDED_COMMAND; 27.20 + u.ptr |= (unsigned long)&default_ldt[0]; 27.21 + u.val = MMUEXT_SET_LDT | (5 << MMUEXT_CMD_SHIFT); 27.22 + if ( unlikely(HYPERVISOR_mmu_update(&u, &count) < 0) ) 27.23 + { 27.24 + show_trace(NULL); 27.25 + panic("Failed to install default LDT"); 27.26 + } 27.27 + return; 27.28 } 27.29 } 27.30
28.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c Tue May 11 14:31:55 2004 +0000 28.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/hypervisor.c Tue May 11 14:57:44 2004 +0000 28.3 @@ -37,12 +37,13 @@ static void DEBUG_allow_pt_reads(void) 28.4 int i; 28.5 for ( i = idx-1; i >= 0; i-- ) 28.6 { 28.7 + int count = 1; 28.8 pte = update_debug_queue[i].ptep; 28.9 if ( pte == NULL ) continue; 28.10 update_debug_queue[i].ptep = NULL; 28.11 update.ptr = virt_to_machine(pte); 28.12 update.val = update_debug_queue[i].pteval; 28.13 - HYPERVISOR_mmu_update(&update, 1); 28.14 + HYPERVISOR_mmu_update(&update, &count); 28.15 } 28.16 } 28.17 static void DEBUG_disallow_pt_read(unsigned long va) 28.18 @@ -51,6 +52,7 @@ static void DEBUG_disallow_pt_read(unsig 28.19 pmd_t *pmd; 28.20 pgd_t *pgd; 28.21 unsigned long pteval; 28.22 + int count = 1; 28.23 /* 28.24 * We may fault because of an already outstanding update. 28.25 * That's okay -- it'll get fixed up in the fault handler. 28.26 @@ -62,7 +64,7 @@ static void DEBUG_disallow_pt_read(unsig 28.27 update.ptr = virt_to_machine(pte); 28.28 pteval = *(unsigned long *)pte; 28.29 update.val = pteval & ~_PAGE_PRESENT; 28.30 - HYPERVISOR_mmu_update(&update, 1); 28.31 + HYPERVISOR_mmu_update(&update, &count); 28.32 update_debug_queue[idx].ptep = pte; 28.33 update_debug_queue[idx].pteval = pteval; 28.34 } 28.35 @@ -100,7 +102,7 @@ void MULTICALL_flush_page_update_queue(v 28.36 wmb(); /* Make sure index is cleared first to avoid double updates. */ 28.37 queue_multicall2(__HYPERVISOR_mmu_update, 28.38 (unsigned long)update_queue, 28.39 - _idx); 28.40 + &_idx); 28.41 } 28.42 spin_unlock_irqrestore(&update_lock, flags); 28.43 } 28.44 @@ -116,7 +118,7 @@ static inline void __flush_page_update_q 28.45 #endif 28.46 idx = 0; 28.47 wmb(); /* Make sure index is cleared first to avoid double updates. */ 28.48 - if ( unlikely(HYPERVISOR_mmu_update(update_queue, _idx) < 0) ) 28.49 + if ( unlikely(HYPERVISOR_mmu_update(update_queue, &_idx) < 0) ) 28.50 panic("Failed to execute MMU updates"); 28.51 } 28.52
29.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c Tue May 11 14:31:55 2004 +0000 29.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c Tue May 11 14:57:44 2004 +0000 29.3 @@ -27,16 +27,12 @@ 29.4 #define direct_mk_pte_phys(physpage, pgprot) \ 29.5 __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) 29.6 29.7 -static inline int direct_remap_area_pte(pte_t *pte, 29.8 +static inline void direct_remap_area_pte(pte_t *pte, 29.9 unsigned long address, 29.10 unsigned long size, 29.11 - unsigned long machine_addr, 29.12 - pgprot_t prot, 29.13 - domid_t domid) 29.14 + mmu_update_t **v) 29.15 { 29.16 unsigned long end; 29.17 -#define MAX_DIRECTMAP_MMU_QUEUE 130 29.18 - mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v, *w; 29.19 29.20 address &= ~PMD_MASK; 29.21 end = address + size; 29.22 @@ -45,7 +41,87 @@ static inline int direct_remap_area_pte( 29.23 if (address >= end) 29.24 BUG(); 29.25 29.26 - /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */ 29.27 + do { 29.28 +#if 0 // XXX 29.29 + if (!pte_none(*pte)) { 29.30 + printk("direct_remap_area_pte: page already exists\n"); 29.31 + BUG(); 29.32 + } 29.33 +#endif 29.34 + (*v)->ptr = virt_to_machine(pte); 29.35 + (*v)++; 29.36 + address += PAGE_SIZE; 29.37 + pte++; 29.38 + } while (address && (address < end)); 29.39 + return ; 29.40 +} 29.41 + 29.42 +static inline int direct_remap_area_pmd(struct mm_struct *mm, 29.43 + pmd_t *pmd, 29.44 + unsigned long address, 29.45 + unsigned long size, 29.46 + mmu_update_t **v) 29.47 +{ 29.48 + unsigned long end; 29.49 + 29.50 + address &= ~PGDIR_MASK; 29.51 + end = address + size; 29.52 + if (end > PGDIR_SIZE) 29.53 + end = PGDIR_SIZE; 29.54 + if (address >= end) 29.55 + BUG(); 29.56 + do { 29.57 + pte_t * pte = pte_alloc(mm, pmd, address); 29.58 + if (!pte) 29.59 + return -ENOMEM; 29.60 + direct_remap_area_pte(pte, address, end - address, v); 29.61 + 29.62 + address = (address + PMD_SIZE) & PMD_MASK; 29.63 + pmd++; 29.64 + } while (address && (address < end)); 29.65 + return 0; 29.66 +} 29.67 + 29.68 +int __direct_remap_area_pages(struct mm_struct *mm, 29.69 + unsigned long address, 29.70 + unsigned long size, 29.71 + mmu_update_t *v) 29.72 +{ 29.73 + pgd_t * dir; 29.74 + unsigned long end = address + size; 29.75 + 29.76 + dir = pgd_offset(mm, address); 29.77 + flush_cache_all(); 29.78 + if (address >= end) 29.79 + BUG(); 29.80 + spin_lock(&mm->page_table_lock); 29.81 + do { 29.82 + pmd_t *pmd = pmd_alloc(mm, dir, address); 29.83 + if (!pmd) 29.84 + return -ENOMEM; 29.85 + direct_remap_area_pmd(mm, pmd, address, end - address, &v); 29.86 + address = (address + PGDIR_SIZE) & PGDIR_MASK; 29.87 + dir++; 29.88 + 29.89 + } while (address && (address < end)); 29.90 + spin_unlock(&mm->page_table_lock); 29.91 + flush_tlb_all(); 29.92 + return 0; 29.93 +} 29.94 + 29.95 + 29.96 +int direct_remap_area_pages(struct mm_struct *mm, 29.97 + unsigned long address, 29.98 + unsigned long machine_addr, 29.99 + unsigned long size, 29.100 + pgprot_t prot, 29.101 + domid_t domid) 29.102 +{ 29.103 + int i, count; 29.104 + unsigned long start_address; 29.105 +#define MAX_DIRECTMAP_MMU_QUEUE 130 29.106 + mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v; 29.107 + 29.108 if ( domid != 0 ) 29.109 { 29.110 u[0].val = (unsigned long)(domid<<16) & ~0xFFFFUL; 29.111 @@ -63,98 +139,46 @@ static inline int direct_remap_area_pte( 29.112 v = w = &u[0]; 29.113 } 29.114 29.115 - do { 29.116 - if ( (v-u) == MAX_DIRECTMAP_MMU_QUEUE ) 29.117 - { 29.118 - if ( HYPERVISOR_mmu_update(u, MAX_DIRECTMAP_MMU_QUEUE) < 0 ) 29.119 - return -EFAULT; 29.120 - v = w; 29.121 - } 29.122 -#if 0 /* thanks to new ioctl mmaping interface this is no longer a bug */ 29.123 - if (!pte_none(*pte)) { 29.124 - printk("direct_remap_area_pte: page already exists\n"); 29.125 - BUG(); 29.126 - } 29.127 -#endif 29.128 - v->ptr = virt_to_machine(pte); 29.129 + start_address = address; 29.130 + 29.131 + for(i=0; i<size; 29.132 + i+=PAGE_SIZE, machine_addr+=PAGE_SIZE, address+=PAGE_SIZE, v++) 29.133 + { 29.134 + if( (v-u) == MAX_DIRECTMAP_MMU_QUEUE ) 29.135 + { 29.136 + /* get the ptep's filled in */ 29.137 + __direct_remap_area_pages( mm, 29.138 + start_address, 29.139 + address-start_address, 29.140 + w); 29.141 + 29.142 + count = v-u; 29.143 + if ( HYPERVISOR_mmu_update(u, &count) < 0 ) 29.144 + return -EFAULT; 29.145 + v=w; 29.146 + start_address = address; 29.147 + } 29.148 + 29.149 + /* fill in the machine addresses */ 29.150 v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO; 29.151 - v++; 29.152 - address += PAGE_SIZE; 29.153 - machine_addr += PAGE_SIZE; 29.154 - pte++; 29.155 - } while (address && (address < end)); 29.156 + } 29.157 29.158 - if ( ((v-w) != 0) && (HYPERVISOR_mmu_update(u, v-u) < 0) ) 29.159 - return -EFAULT; 29.160 + if(v!=w) 29.161 + { 29.162 + /* get the ptep's filled in */ 29.163 + __direct_remap_area_pages( mm, 29.164 + start_address, 29.165 + address-start_address, 29.166 + w); 29.167 + count = v-u; 29.168 + if ( HYPERVISOR_mmu_update(u, &count) < 0 ) 29.169 + return -EFAULT; 29.170 29.171 + } 29.172 + 29.173 return 0; 29.174 } 29.175 29.176 -static inline int direct_remap_area_pmd(struct mm_struct *mm, 29.177 - pmd_t *pmd, 29.178 - unsigned long address, 29.179 - unsigned long size, 29.180 - unsigned long machine_addr, 29.181 - pgprot_t prot, 29.182 - domid_t domid) 29.183 -{ 29.184 - int error = 0; 29.185 - unsigned long end; 29.186 - 29.187 - address &= ~PGDIR_MASK; 29.188 - end = address + size; 29.189 - if (end > PGDIR_SIZE) 29.190 - end = PGDIR_SIZE; 29.191 - machine_addr -= address; 29.192 - if (address >= end) 29.193 - BUG(); 29.194 - do { 29.195 - pte_t * pte = pte_alloc(mm, pmd, address); 29.196 - if (!pte) 29.197 - return -ENOMEM; 29.198 - error = direct_remap_area_pte(pte, address, end - address, 29.199 - address + machine_addr, prot, domid); 29.200 - if ( error ) 29.201 - break; 29.202 - address = (address + PMD_SIZE) & PMD_MASK; 29.203 - pmd++; 29.204 - } while (address && (address < end)); 29.205 - return error; 29.206 -} 29.207 - 29.208 -int direct_remap_area_pages(struct mm_struct *mm, 29.209 - unsigned long address, 29.210 - unsigned long machine_addr, 29.211 - unsigned long size, 29.212 - pgprot_t prot, 29.213 - domid_t domid) 29.214 -{ 29.215 - int error = 0; 29.216 - pgd_t * dir; 29.217 - unsigned long end = address + size; 29.218 - 29.219 - machine_addr -= address; 29.220 - dir = pgd_offset(mm, address); 29.221 - flush_cache_all(); 29.222 - if (address >= end) 29.223 - BUG(); 29.224 - spin_lock(&mm->page_table_lock); 29.225 - do { 29.226 - pmd_t *pmd = pmd_alloc(mm, dir, address); 29.227 - error = -ENOMEM; 29.228 - if (!pmd) 29.229 - break; 29.230 - error = direct_remap_area_pmd(mm, pmd, address, end - address, 29.231 - machine_addr + address, prot, domid); 29.232 - if (error) 29.233 - break; 29.234 - address = (address + PGDIR_SIZE) & PGDIR_MASK; 29.235 - dir++; 29.236 - } while (address && (address < end)); 29.237 - spin_unlock(&mm->page_table_lock); 29.238 - flush_tlb_all(); 29.239 - return error; 29.240 -} 29.241 29.242 #endif /* CONFIG_XEN_PRIVILEGED_GUEST */ 29.243
30.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h Tue May 11 14:31:55 2004 +0000 30.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/hypervisor.h Tue May 11 14:57:44 2004 +0000 30.3 @@ -153,7 +153,7 @@ static inline int HYPERVISOR_set_trap_ta 30.4 return ret; 30.5 } 30.6 30.7 -static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int count) 30.8 +static inline int HYPERVISOR_mmu_update(mmu_update_t *req, int *count) 30.9 { 30.10 int ret; 30.11 __asm__ __volatile__ (
31.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h Tue May 11 14:31:55 2004 +0000 31.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgalloc.h Tue May 11 14:57:44 2004 +0000 31.3 @@ -276,4 +276,11 @@ extern int direct_remap_area_pages(struc 31.4 pgprot_t prot, 31.5 domid_t domid); 31.6 31.7 +extern int __direct_remap_area_pages(struct mm_struct *mm, 31.8 + unsigned long address, 31.9 + unsigned long size, 31.10 + mmu_update_t *v); 31.11 + 31.12 + 31.13 + 31.14 #endif /* _I386_PGALLOC_H */
32.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h Tue May 11 14:31:55 2004 +0000 32.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h Tue May 11 14:57:44 2004 +0000 32.3 @@ -25,6 +25,13 @@ typedef struct privcmd_mmap { 32.4 privcmd_mmap_entry_t *entry; 32.5 } privcmd_mmap_t; 32.6 32.7 +typedef struct privcmd_mmapbatch { 32.8 + int num; // number of pages to populate 32.9 + domid_t dom; // target domain 32.10 + unsigned long addr; // virtual address 32.11 + unsigned long *arr; // array of mfns - top nibble set on err 32.12 +} privcmd_mmapbatch_t; 32.13 + 32.14 typedef struct privcmd_blkmsg 32.15 { 32.16 unsigned long op; 32.17 @@ -50,5 +57,7 @@ typedef struct privcmd_blkmsg 32.18 _IOC(_IOC_NONE, 'P', 1, 0) 32.19 #define IOCTL_PRIVCMD_MMAP \ 32.20 _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) 32.21 +#define IOCTL_PRIVCMD_MMAPBATCH \ 32.22 + _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmapbatch_t)) 32.23 32.24 #endif /* __PROC_CMD_H__ */