ia64/xen-unstable
changeset 1364:5d4e9fe26885
bitkeeper revision 1.899 (40a1ea3a_7ZmXmUq-RTnUSd3QZ_X1Q)
live migration cleanups
live migration cleanups
author | iap10@labyrinth.cl.cam.ac.uk |
---|---|
date | Wed May 12 09:11:22 2004 +0000 (2004-05-12) |
parents | 34951071caf8 |
children | 76c4a76ab5d1 |
files | tools/xc/lib/xc_linux_restore.c tools/xc/lib/xc_linux_save.c tools/xc/lib/xc_private.c tools/xc/py/Xc.c xen/Rules.mk xen/common/dom0_ops.c xen/common/kernel.c xen/common/shadow.c xen/include/hypervisor-ifs/dom0_ops.h xen/include/xen/shadow.h |
line diff
1.1 --- a/tools/xc/lib/xc_linux_restore.c Tue May 11 15:54:57 2004 +0000 1.2 +++ b/tools/xc/lib/xc_linux_restore.c Wed May 12 09:11:22 2004 +0000 1.3 @@ -12,6 +12,15 @@ 1.4 1.5 #define MAX_BATCH_SIZE 1024 1.6 1.7 +#define DEBUG 0 1.8 + 1.9 +#if DEBUG 1.10 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 1.11 +#else 1.12 +#define DPRINTF(_f, _a...) ((void)0) 1.13 +#endif 1.14 + 1.15 + 1.16 /* This may allow us to create a 'quiet' command-line option, if necessary. */ 1.17 #define verbose_printf(_f, _a...) \ 1.18 do { \ 1.19 @@ -230,7 +239,7 @@ int xc_linux_restore(int xc_handle, 1.20 goto out; 1.21 } 1.22 1.23 - printf("batch %d\n",j); 1.24 + DPRINTF("batch %d\n",j); 1.25 1.26 if (j == 0) 1.27 break; // our work here is done 1.28 @@ -272,11 +281,6 @@ int xc_linux_restore(int xc_handle, 1.29 1.30 pfn = region_pfn_type[i] & ~PGT_type_mask; 1.31 1.32 -//if(n>=nr_pfns || ((region_pfn_type[i] & PGT_type_mask) == L2TAB) ) printf("pfn=%08lx mfn=%x\n",region_pfn_type[i],pfn_to_mfn_table[pfn]); 1.33 - 1.34 - 1.35 -//if(pfn_type[i])printf("^pfn=%d %08lx\n",pfn,pfn_type[i]); 1.36 - 1.37 if ((region_pfn_type[i]>>29) == 7) 1.38 continue; 1.39 1.40 @@ -292,8 +296,6 @@ int xc_linux_restore(int xc_handle, 1.41 1.42 mfn = pfn_to_mfn_table[pfn]; 1.43 1.44 -//if(region_pfn_type[i])printf("i=%d pfn=%d mfn=%d type=%lx\n",i,pfn,mfn,region_pfn_type[i]); 1.45 - 1.46 ppage = (unsigned long*) (region_base + i*PAGE_SIZE); 1.47 1.48 if ( (*readerfn)(readerst, ppage, PAGE_SIZE) ) 1.49 @@ -315,21 +317,12 @@ int xc_linux_restore(int xc_handle, 1.50 { 1.51 xpfn = ppage[k] >> PAGE_SHIFT; 1.52 1.53 -/*printf("L1 i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", 1.54 - i,pfn,mfn,k,ppage[k],xpfn);*/ 1.55 - 1.56 if ( xpfn >= nr_pfns ) 1.57 { 1.58 ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=0x%x nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns); 1.59 goto out; 1.60 } 1.61 -#if 0 1.62 - if ( (region_pfn_type[xpfn] != NONE) && (ppage[k] & _PAGE_RW) ) 1.63 - { 1.64 - ERROR("Write access requested for a restricted frame"); 1.65 - goto out; 1.66 - } 1.67 -#endif 1.68 + 1.69 ppage[k] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT); 1.70 ppage[k] |= pfn_to_mfn_table[xpfn] << PAGE_SHIFT; 1.71 } 1.72 @@ -345,9 +338,6 @@ int xc_linux_restore(int xc_handle, 1.73 { 1.74 xpfn = ppage[k] >> PAGE_SHIFT; 1.75 1.76 -/*printf("L2 i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", 1.77 - i,pfn,mfn,k,ppage[k],xpfn);*/ 1.78 - 1.79 if ( xpfn >= nr_pfns ) 1.80 { 1.81 ERROR("Frame number in type %d page table is out of range. i=%d k=%d pfn=%d nr_pfns=%d",region_pfn_type[i]>>29,i,k,xpfn,nr_pfns); 1.82 @@ -383,7 +373,8 @@ int xc_linux_restore(int xc_handle, 1.83 n+=j; // crude stats 1.84 1.85 } 1.86 -printf("RECEIVED ALL PAGES\n"); 1.87 + 1.88 + DPRINTF("Received all pages\n"); 1.89 1.90 mfn_mapper_close( region_mapper ); 1.91 1.92 @@ -445,8 +436,6 @@ printf("RECEIVED ALL PAGES\n"); 1.93 p_srec->resume_info.flags = 0; 1.94 unmap_pfn(pm_handle, p_srec); 1.95 1.96 -printf("new shared info is %lx\n", shared_info_frame); 1.97 - 1.98 /* Uncanonicalise each GDT frame number. */ 1.99 if ( ctxt.gdt_ents > 8192 ) 1.100 { 1.101 @@ -554,9 +543,9 @@ printf("new shared info is %lx\n", share 1.102 op.u.builddomain.ctxt = &ctxt; 1.103 rc = do_dom0_op(xc_handle, &op); 1.104 1.105 -printf("NORMAL EXIT RESTORE\n"); 1.106 + DPRINTF("Everything OK!\n"); 1.107 + 1.108 out: 1.109 -printf("EXIT RESTORE\n"); 1.110 if ( mmu != NULL ) 1.111 free(mmu); 1.112
2.1 --- a/tools/xc/lib/xc_linux_save.c Tue May 11 15:54:57 2004 +0000 2.2 +++ b/tools/xc/lib/xc_linux_save.c Wed May 12 09:11:22 2004 +0000 2.3 @@ -11,6 +11,14 @@ 2.4 2.5 #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */ 2.6 2.7 +#define DEBUG 0 2.8 + 2.9 +#if DEBUG 2.10 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) 2.11 +#else 2.12 +#define DPRINTF(_f, _a...) ((void)0) 2.13 +#endif 2.14 + 2.15 /* This may allow us to create a 'quiet' command-line option, if necessary. */ 2.16 #define verbose_printf(_f, _a...) \ 2.17 do { \ 2.18 @@ -64,7 +72,7 @@ int xc_linux_save(int xc_handle, 2.19 int rc = 1, i, j, k, n, last_iter, iter = 0; 2.20 unsigned long mfn; 2.21 int verbose = flags & XCFLAGS_VERBOSE; 2.22 - int live = 1; //flags & XCFLAGS_LIVE; // XXXXXXXXXXXXXXXXXXX 2.23 + int live = flags & XCFLAGS_LIVE; 2.24 int sent_last_iter, sent_this_iter, max_iters; 2.25 2.26 /* Remember if we stopped the guest, so we can restart it on exit. */ 2.27 @@ -149,15 +157,12 @@ int xc_linux_save(int xc_handle, 2.28 printf("Sleep for 1ms\n"); 2.29 } 2.30 2.31 -#if 1 2.32 /* A cheesy test to see whether the domain contains valid state. */ 2.33 if ( ctxt.pt_base == 0 ) 2.34 { 2.35 ERROR("Domain is not in a valid Linux guest OS state"); 2.36 goto out; 2.37 } 2.38 -#endif 2.39 - 2.40 2.41 /* Map the suspend-record MFN to pin it. The page must be owned by 2.42 domid for this to succeed. */ 2.43 @@ -225,16 +230,23 @@ int xc_linux_save(int xc_handle, 2.44 goto out; 2.45 } 2.46 2.47 - for(i=0;i<(nr_pfns+1023)/1024 ;i++) 2.48 - printf("LF: %d %x\n",i,live_pfn_to_mfn_frame_list[i]); 2.49 2.50 + /* Canonicalise the pfn-to-mfn table frame-number list. */ 2.51 + memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE ); 2.52 + for ( i = 0; i < nr_pfns; i += 1024 ) 2.53 + { 2.54 + if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ) 2.55 + { 2.56 + ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys"); 2.57 + goto out; 2.58 + } 2.59 + } 2.60 2.61 - /* At this point, we can start the domain again if we're doign a 2.62 + /* At this point, we can start the domain again if we're doing a 2.63 live suspend */ 2.64 2.65 if( live ) 2.66 { 2.67 -#if 1 2.68 if ( xc_shadow_control( xc_handle, domid, 2.69 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY, 2.70 NULL, 0 ) < 0 ) 2.71 @@ -242,16 +254,16 @@ int xc_linux_save(int xc_handle, 2.72 ERROR("Couldn't enable shadow mode"); 2.73 goto out; 2.74 } 2.75 -#endif 2.76 + 2.77 if ( xc_domain_start( xc_handle, domid ) < 0 ) 2.78 { 2.79 ERROR("Couldn't restart domain"); 2.80 goto out; 2.81 } 2.82 -//exit(-1); 2.83 + 2.84 last_iter = 0; 2.85 sent_last_iter = 1<<20; // 4GB's worth of pages 2.86 - max_iters = 8; // limit us to 9 time round loop 2.87 + max_iters = 9; // limit us to 10 time round loop 2.88 } 2.89 else 2.90 last_iter = 1; 2.91 @@ -297,7 +309,7 @@ int xc_linux_save(int xc_handle, 2.92 /* 2.93 * Quick belt and braces sanity check. 2.94 */ 2.95 - 2.96 +#if DEBUG 2.97 for ( i = 0; i < nr_pfns; i++ ) 2.98 { 2.99 mfn = live_pfn_to_mfn_table[i]; 2.100 @@ -306,17 +318,7 @@ int xc_linux_save(int xc_handle, 2.101 printf("i=0x%x mfn=%x live_mfn_to_pfn_table=%x\n", 2.102 i,mfn,live_mfn_to_pfn_table[mfn]); 2.103 } 2.104 - 2.105 - /* Canonicalise the pfn-to-mfn table frame-number list. */ 2.106 - memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE ); 2.107 - for ( i = 0; i < nr_pfns; i += 1024 ) 2.108 - { 2.109 - if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ) 2.110 - { 2.111 - ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys"); 2.112 - goto out; 2.113 - } 2.114 - } 2.115 +#endif 2.116 2.117 /* Map the shared info frame */ 2.118 live_shinfo = mfn_mapper_map_single(xc_handle, domid, 2.119 @@ -374,28 +376,16 @@ int xc_linux_save(int xc_handle, 2.120 2.121 if( pfn_type[batch] == 0x80000004 ) 2.122 { 2.123 - //printf("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]); 2.124 + DPRINTF("Skip netbuf pfn %lx. mfn %lx\n",n,pfn_type[batch]); 2.125 continue; 2.126 } 2.127 2.128 -//if(iter>1) printf("pfn=%x mfn=%x\n",n,pfn_type[batch]); 2.129 + if(iter>1) { DPRINTF("pfn=%x mfn=%x\n",n,pfn_type[batch]); } 2.130 2.131 batch++; 2.132 } 2.133 - 2.134 - for( j = 0; j < batch; j++ ) 2.135 - { 2.136 - 2.137 - if( (pfn_type[j] &0xfffff) == 0x0000004 ) 2.138 - { 2.139 - printf("XXXXXXXXSkip netbuf entry %d mfn %lx\n",j,pfn_type[j]); 2.140 - } 2.141 - 2.142 - 2.143 - } 2.144 - 2.145 2.146 - printf("batch %d:%d (n=%d)\n",iter,batch,n); 2.147 + DPRINTF("batch %d:%d (n=%d)\n",iter,batch,n); 2.148 2.149 if(batch == 0) goto skip; // vanishingly unlikely... 2.150 2.151 @@ -418,10 +408,9 @@ int xc_linux_save(int xc_handle, 2.152 { 2.153 if((pfn_type[j]>>29) == 7) 2.154 { 2.155 - //printf("type fail: page %i mfn %08lx\n",j,pfn_type[j]); 2.156 + DPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]); 2.157 continue; 2.158 } 2.159 -//if((pfn_type[j] & PGT_type_mask) == L2TAB) printf("L2 pfn=%08lx mfn=%lx\n",pfn_type[j],live_mfn_to_pfn_table[pfn_type[j]&~PGT_type_mask]); 2.160 2.161 /* canonicalise mfn->pfn */ 2.162 pfn_type[j] = (pfn_type[j] & PGT_type_mask) | 2.163 @@ -448,7 +437,7 @@ int xc_linux_save(int xc_handle, 2.164 2.165 if((pfn_type[j]>>29) == 7) 2.166 { 2.167 - //printf("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]); 2.168 + DPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]); 2.169 continue; 2.170 } 2.171 2.172 @@ -471,26 +460,28 @@ int xc_linux_save(int xc_handle, 2.173 2.174 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) 2.175 { 2.176 + // I don't think this should ever happen 2.177 + 2.178 printf("FNI %d : [%08lx,%d] pte=%08lx, mfn=%08lx, pfn=%08lx [mfn]=%08lx\n", 2.179 j, pfn_type[j], k, 2.180 page[k], mfn, live_mfn_to_pfn_table[mfn], 2.181 (live_mfn_to_pfn_table[mfn]<nr_pfns)? 2.182 - live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef); 2.183 - pfn = 0; // be suspicious 2.184 + live_pfn_to_mfn_table[live_mfn_to_pfn_table[mfn]]: 0xdeadbeef); 2.185 + 2.186 + pfn = 0; // be suspicious, very suspicious 2.187 2.188 -// ERROR("Frame number in pagetable page is invalid"); 2.189 -// goto out; 2.190 + //goto out; // let's try our luck 2.191 2.192 2.193 } 2.194 page[k] &= PAGE_SIZE - 1; 2.195 page[k] |= pfn << PAGE_SHIFT; 2.196 2.197 - /* 2.198 - printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", 2.199 - pfn_type[j]>>29, 2.200 - j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT); 2.201 - */ 2.202 +#if DEBUG 2.203 + printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx xpfn=%d\n", 2.204 + pfn_type[j]>>29, 2.205 + j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT); 2.206 +#endif 2.207 2.208 } /* end of page table rewrite for loop */ 2.209 2.210 @@ -526,11 +517,8 @@ int xc_linux_save(int xc_handle, 2.211 2.212 if ( live ) 2.213 { 2.214 - if ( sent_this_iter < (sent_last_iter * 0.95) && iter < max_iters ) 2.215 - { 2.216 - // we seem to be doing OK, keep going 2.217 - } 2.218 - else 2.219 + if ( ( sent_this_iter > (sent_last_iter * 0.95) ) || 2.220 + (iter >= max_iters) || (sent_this_iter < 10) ) 2.221 { 2.222 printf("Start last iteration\n"); 2.223 last_iter = 1; 2.224 @@ -547,17 +535,13 @@ int xc_linux_save(int xc_handle, 2.225 goto out; 2.226 } 2.227 2.228 -#if 0 2.229 - if(last_iter) memset(to_send, 0xff, (nr_pfns+7)/8 ); 2.230 -#endif 2.231 - 2.232 sent_last_iter = sent_this_iter; 2.233 } 2.234 2.235 2.236 } /* end of while 1 */ 2.237 2.238 -printf("All memory is saved\n"); 2.239 + DPRINTF("All memory is saved\n"); 2.240 2.241 /* Success! */ 2.242 rc = 0; 2.243 @@ -579,14 +563,14 @@ printf("All memory is saved\n"); 2.244 PERROR("Could not get info on domain"); 2.245 goto out; 2.246 } 2.247 -printf("A\n"); 2.248 + 2.249 /* Canonicalise the suspend-record frame number. */ 2.250 if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ) 2.251 { 2.252 ERROR("State record is not in range of pseudophys map"); 2.253 goto out; 2.254 } 2.255 -printf("B\n"); 2.256 + 2.257 /* Canonicalise each GDT frame number. */ 2.258 for ( i = 0; i < ctxt.gdt_ents; i += 512 ) 2.259 { 2.260 @@ -596,7 +580,7 @@ printf("B\n"); 2.261 goto out; 2.262 } 2.263 } 2.264 -printf("C\n"); 2.265 + 2.266 /* Canonicalise the page table base pointer. */ 2.267 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) 2.268 { 2.269 @@ -604,7 +588,7 @@ printf("C\n"); 2.270 goto out; 2.271 } 2.272 ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT; 2.273 -printf("D\n"); 2.274 + 2.275 if ( (*writerfn)(writerst, &ctxt, sizeof(ctxt)) || 2.276 (*writerfn)(writerst, live_shinfo, PAGE_SIZE) ) 2.277 { 2.278 @@ -612,7 +596,7 @@ printf("D\n"); 2.279 goto out; 2.280 } 2.281 munmap(live_shinfo, PAGE_SIZE); 2.282 -printf("E\n"); 2.283 + 2.284 out: 2.285 /* Restart the domain if we had to stop it to save its state. */ 2.286 if ( we_stopped_it )
3.1 --- a/tools/xc/lib/xc_private.c Tue May 11 15:54:57 2004 +0000 3.2 +++ b/tools/xc/lib/xc_private.c Wed May 12 09:11:22 2004 +0000 3.3 @@ -379,19 +379,24 @@ int finish_mmu_updates(int xc_handle, mm 3.4 int xc_domain_stop_sync( int xc_handle, domid_t domid ) 3.5 { 3.6 dom0_op_t op; 3.7 + int i; 3.8 + 3.9 3.10 - while (1) 3.11 + op.cmd = DOM0_STOPDOMAIN; 3.12 + op.u.stopdomain.domain = (domid_t)domid; 3.13 + if ( do_dom0_op(xc_handle, &op) != 0 ) 3.14 { 3.15 - op.cmd = DOM0_STOPDOMAIN; 3.16 - op.u.stopdomain.domain = (domid_t)domid; 3.17 - if ( do_dom0_op(xc_handle, &op) != 0 ) 3.18 - { 3.19 - PERROR("Stopping target domain failed"); 3.20 - goto out; 3.21 - } 3.22 + PERROR("Stopping target domain failed"); 3.23 + goto out; 3.24 + } 3.25 + 3.26 + usleep(100); // 100us 3.27 3.28 - usleep(1000); // 1ms 3.29 - printf("Sleep for 1ms\n"); 3.30 + for(i=0;;i++) 3.31 + { 3.32 + if (i>0) 3.33 + if (i==1) printf("Sleep."); 3.34 + else printf("."); 3.35 3.36 op.cmd = DOM0_GETDOMAININFO; 3.37 op.u.getdomaininfo.domain = (domid_t)domid; 3.38 @@ -405,10 +410,11 @@ int xc_domain_stop_sync( int xc_handle, 3.39 3.40 if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED ) 3.41 { 3.42 - printf("Domain %lld stopped\n",domid); 3.43 + printf("\nDomain %lld stopped\n",domid); 3.44 return 0; 3.45 } 3.46 - 3.47 + 3.48 + usleep(1000); 3.49 } 3.50 3.51 out:
4.1 --- a/tools/xc/py/Xc.c Tue May 11 15:54:57 2004 +0000 4.2 +++ b/tools/xc/py/Xc.c Wed May 12 09:11:22 2004 +0000 4.3 @@ -191,7 +191,7 @@ static PyObject *pyxc_linux_save(PyObjec 4.4 4.5 u64 dom; 4.6 char *state_file; 4.7 - int progress = 1, live = 0; 4.8 + int progress = 1, live = -1; 4.9 unsigned int flags = 0; 4.10 4.11 static char *kwd_list[] = { "dom", "state_file", "progress", "live", NULL }; 4.12 @@ -200,8 +200,8 @@ static PyObject *pyxc_linux_save(PyObjec 4.13 &dom, &state_file, &progress, &live) ) 4.14 return NULL; 4.15 4.16 - if (progress) flags |= XCFLAGS_VERBOSE; 4.17 - if (live) flags |= XCFLAGS_LIVE; 4.18 + if (progress) flags |= XCFLAGS_VERBOSE; 4.19 + if (live == 1) flags |= XCFLAGS_LIVE; 4.20 4.21 if ( strncmp(state_file,"tcp:", strlen("tcp:")) == 0 ) 4.22 { 4.23 @@ -226,6 +226,8 @@ static PyObject *pyxc_linux_save(PyObjec 4.24 return 0; 4.25 } 4.26 4.27 + if (live == -1) flags |= XCFLAGS_LIVE; // default to live for tcp 4.28 + 4.29 strncpy( server, state_file+strlen("tcp://"), max_namelen); 4.30 server[max_namelen-1]='\0'; 4.31 if ( (port_s = strchr(server,':')) != NULL )
5.1 --- a/xen/Rules.mk Tue May 11 15:54:57 2004 +0000 5.2 +++ b/xen/Rules.mk Wed May 12 09:11:22 2004 +0000 5.3 @@ -3,6 +3,7 @@ TARGET_ARCH ?= $(COMPILE_ARCH) 5.4 5.5 nodev ?= n 5.6 debug ?= n 5.7 +trace ?= n 5.8 5.9 TARGET := $(BASEDIR)/xen 5.10 HDRS := $(wildcard $(BASEDIR)/include/xen/*.h) 5.11 @@ -50,6 +51,10 @@ ifeq ($(nodev),y) 5.12 CFLAGS += -DNO_DEVICES_IN_XEN 5.13 endif 5.14 5.15 +ifeq ($(trace),y) 5.16 +CFLAGS += -DTRACE_BUFFER 5.17 +endif 5.18 + 5.19 %.o: %.c $(HDRS) Makefile 5.20 $(CC) $(CFLAGS) -c $< -o $@ 5.21
6.1 --- a/xen/common/dom0_ops.c Tue May 11 15:54:57 2004 +0000 6.2 +++ b/xen/common/dom0_ops.c Wed May 12 09:11:22 2004 +0000 6.3 @@ -21,6 +21,11 @@ 6.4 #include <xen/shadow.h> 6.5 #include <hypervisor-ifs/sched_ctl.h> 6.6 6.7 + 6.8 +#define TRC_DOM0OP_START_BASE 0x00020000 6.9 +#define TRC_DOM0OP_FINISH_BASE 0x00030000 6.10 + 6.11 + 6.12 extern unsigned int alloc_new_dom_mem(struct task_struct *, unsigned int); 6.13 6.14 /* Basically used to protect the domain-id space. */ 6.15 @@ -68,6 +73,9 @@ long do_dom0_op(dom0_op_t *u_dom0_op) 6.16 return -EACCES; 6.17 } 6.18 6.19 + TRACE_5D( TRC_DOM0OP_START_BASE + op->cmd, 6.20 + 0, op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3] ); 6.21 + 6.22 switch ( op->cmd ) 6.23 { 6.24 6.25 @@ -671,5 +679,9 @@ long do_dom0_op(dom0_op_t *u_dom0_op) 6.26 6.27 } 6.28 6.29 + TRACE_5D( TRC_DOM0OP_FINISH_BASE + op->cmd, ret, 6.30 + op->u.dummy[0], op->u.dummy[1], op->u.dummy[2], op->u.dummy[3] ); 6.31 + 6.32 + 6.33 return ret; 6.34 }
7.1 --- a/xen/common/kernel.c Tue May 11 15:54:57 2004 +0000 7.2 +++ b/xen/common/kernel.c Wed May 12 09:11:22 2004 +0000 7.3 @@ -31,6 +31,7 @@ 7.4 #include <xen/console.h> 7.5 #include <xen/net_headers.h> 7.6 #include <xen/serial.h> 7.7 +#include <xen/shadow.h> 7.8 7.9 kmem_cache_t *task_struct_cachep; 7.10 7.11 @@ -268,6 +269,8 @@ void cmain(unsigned long magic, multiboo 7.12 7.13 set_bit(PF_PRIVILEGED, &new_dom->flags); 7.14 7.15 + shadow_mode_init(); 7.16 + 7.17 /* 7.18 * We're going to setup domain0 using the module(s) that we stashed safely 7.19 * above our MAX_DIRECTMAP_ADDRESS in boot/boot.S. The second module, if
8.1 --- a/xen/common/shadow.c Tue May 11 15:54:57 2004 +0000 8.2 +++ b/xen/common/shadow.c Wed May 12 09:11:22 2004 +0000 8.3 @@ -6,6 +6,8 @@ 8.4 #include <xen/shadow.h> 8.5 #include <asm/domain_page.h> 8.6 #include <asm/page.h> 8.7 +#include <xen/event.h> 8.8 +#include <xen/trace.h> 8.9 8.10 8.11 /******** 8.12 @@ -26,6 +28,8 @@ hypercall lock anyhow (at least initiall 8.13 8.14 ********/ 8.15 8.16 +static spinlock_t cpu_stall_lock; 8.17 + 8.18 static inline void free_shadow_page( struct mm_struct *m, 8.19 struct pfn_info *pfn_info ) 8.20 { 8.21 @@ -155,6 +159,11 @@ static void __scan_shadow_table( struct 8.22 } 8.23 8.24 8.25 +void shadow_mode_init(void) 8.26 +{ 8.27 + spin_lock_init( &cpu_stall_lock ); 8.28 +} 8.29 + 8.30 int shadow_mode_enable( struct task_struct *p, unsigned int mode ) 8.31 { 8.32 struct mm_struct *m = &p->mm; 8.33 @@ -260,11 +269,11 @@ void shadow_mode_disable( struct task_st 8.34 } 8.35 8.36 static int shadow_mode_table_op( struct task_struct *p, 8.37 - dom0_shadow_control_t *sc ) 8.38 + dom0_shadow_control_t *sc ) 8.39 { 8.40 - unsigned int op = sc->op; 8.41 + unsigned int op = sc->op; 8.42 struct mm_struct *m = &p->mm; 8.43 - int rc = 0; 8.44 + int rc = 0; 8.45 8.46 // since Dom0 did the hypercall, we should be running with it's page 8.47 // tables right now. Calling flush on yourself would be really 8.48 @@ -290,37 +299,50 @@ static int shadow_mode_table_op( struct 8.49 break; 8.50 8.51 case DOM0_SHADOW_CONTROL_OP_CLEAN: 8.52 + { 8.53 + int i,j,zero=1; 8.54 + 8.55 + __scan_shadow_table( m, op ); 8.56 + 8.57 + if( p->tot_pages > sc->pages || 8.58 + !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap ) 8.59 { 8.60 - int i; 8.61 - 8.62 - __scan_shadow_table( m, op ); 8.63 - 8.64 - if( p->tot_pages > sc->pages || 8.65 - !sc->dirty_bitmap || !p->mm.shadow_dirty_bitmap ) 8.66 + rc = -EINVAL; 8.67 + goto out; 8.68 + } 8.69 + 8.70 + sc->pages = p->tot_pages; 8.71 + 8.72 +#define chunk (8*1024) // do this in 1KB chunks for L1 cache 8.73 + 8.74 + for(i=0;i<p->tot_pages;i+=chunk) 8.75 + { 8.76 + int bytes = (( ((p->tot_pages-i) > (chunk))? 8.77 + (chunk):(p->tot_pages-i) ) + 7) / 8; 8.78 + 8.79 + copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))), 8.80 + p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), 8.81 + bytes ); 8.82 + 8.83 + for(j=0; zero && j<bytes/sizeof(unsigned long);j++) 8.84 { 8.85 - rc = -EINVAL; 8.86 - goto out; 8.87 + if( p->mm.shadow_dirty_bitmap[j] != 0 ) 8.88 + zero = 0; 8.89 } 8.90 - 8.91 - sc->pages = p->tot_pages; 8.92 - 8.93 -#define chunk (8*1024) // do this in 1KB chunks for L1 cache 8.94 8.95 - for(i=0;i<p->tot_pages;i+=chunk) 8.96 - { 8.97 - int bytes = (( ((p->tot_pages-i) > (chunk))? 8.98 - (chunk):(p->tot_pages-i) ) + 7) / 8; 8.99 + memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), 8.100 + 0, bytes); 8.101 + } 8.102 8.103 - copy_to_user( sc->dirty_bitmap + (i/(8*sizeof(unsigned long))), 8.104 - p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), 8.105 - bytes ); 8.106 - 8.107 - memset( p->mm.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))), 8.108 - 0, bytes); 8.109 - } 8.110 - 8.111 - break; 8.112 + if (zero) 8.113 + { 8.114 + /* might as well stop the domain as an optimization. */ 8.115 + if ( p->state != TASK_STOPPED ) 8.116 + send_guest_virq(p, VIRQ_STOP); 8.117 } 8.118 + 8.119 + break; 8.120 + } 8.121 } 8.122 8.123 8.124 @@ -338,22 +360,45 @@ out: 8.125 return rc; 8.126 } 8.127 8.128 - 8.129 int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc ) 8.130 { 8.131 - int we_paused = 0; 8.132 - unsigned int cmd = sc->op; 8.133 - int rc = 0; 8.134 - 8.135 + unsigned int cmd = sc->op; 8.136 + int rc = 0, cpu; 8.137 + 8.138 // don't call if already shadowed... 8.139 8.140 - // sychronously stop domain 8.141 - if( 0 && !(p->state & TASK_STOPPED) && !(p->state & TASK_PAUSED)) 8.142 + /* The following is pretty hideous because we don't have a way of 8.143 + synchronously pausing a domain. If it's assigned to the curernt CPU, 8.144 + we don't have to worry -- it can't possibly actually be running. 8.145 + If its on another CPU, for the moment, we do something really gross: 8.146 + we cause the other CPU to spin regardless of what domain it is running. 8.147 + 8.148 + I know this is really grim, but it only lasts a few 10's of 8.149 + microseconds. It needs fixing as soon as the last of the Linux-isms 8.150 + get removed from the task structure... 8.151 + 8.152 + Oh, and let's hope someone doesn't repin the CPU while we're here. 8.153 + Also, prey someone else doesn't do this in another domain. 8.154 + At least there's only one dom0 at the moment... 8.155 + */ 8.156 +printk("SMC\n"); 8.157 + spin_lock( &cpu_stall_lock ); 8.158 + cpu = p->processor; 8.159 +printk("got %d %d\n",cpu, current->processor ); 8.160 + if ( cpu != current->processor ) 8.161 { 8.162 - printk("about to pause domain\n"); 8.163 - sched_pause_sync(p); 8.164 - printk("paused domain\n"); 8.165 - we_paused = 1; 8.166 +printk("CPU %d %d\n",cpu, current->processor ); 8.167 + static void cpu_stall(void * data) 8.168 + { 8.169 + if ( current->processor == (int) data ) 8.170 + { 8.171 + printk("Stall %d\n",(int)data); 8.172 + spin_lock( &cpu_stall_lock ); 8.173 + spin_unlock( &cpu_stall_lock ); 8.174 + } 8.175 + } 8.176 + 8.177 + smp_call_function(cpu_stall, (void*)cpu, 1, 0); // don't wait! 8.178 } 8.179 8.180 if ( p->mm.shadow_mode && cmd == DOM0_SHADOW_CONTROL_OP_OFF ) 8.181 @@ -376,11 +421,11 @@ int shadow_mode_control( struct task_str 8.182 } 8.183 else 8.184 { 8.185 - if ( we_paused ) wake_up(p); 8.186 - return -EINVAL; 8.187 + rc = -EINVAL; 8.188 } 8.189 8.190 - if ( we_paused ) wake_up(p); 8.191 + spin_unlock( &cpu_stall_lock ); 8.192 +printk("SMC-\n"); 8.193 return rc; 8.194 } 8.195
9.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h Tue May 11 15:54:57 2004 +0000 9.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h Wed May 12 09:11:22 2004 +0000 9.3 @@ -289,6 +289,7 @@ typedef struct dom0_op_st 9.4 unsigned long interface_version; /* DOM0_INTERFACE_VERSION */ 9.5 union 9.6 { 9.7 + unsigned long dummy[4]; 9.8 dom0_createdomain_t createdomain; 9.9 dom0_startdomain_t startdomain; 9.10 dom0_stopdomain_t stopdomain;
10.1 --- a/xen/include/xen/shadow.h Tue May 11 15:54:57 2004 +0000 10.2 +++ b/xen/include/xen/shadow.h Wed May 12 09:11:22 2004 +0000 10.3 @@ -23,6 +23,7 @@ 10.4 #define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START) 10.5 #define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT)))) 10.6 10.7 +extern void shadow_mode_init(void); 10.8 extern int shadow_mode_control( struct task_struct *p, dom0_shadow_control_t *sc ); 10.9 extern int shadow_fault( unsigned long va, long error_code ); 10.10 extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,