ia64/xen-unstable
changeset 6556:a698bd49931b
Merge.
line diff
1.1 --- a/linux-2.4-xen-sparse/mkbuildtree Fri Aug 26 13:47:16 2005 -0700 1.2 +++ b/linux-2.4-xen-sparse/mkbuildtree Mon Aug 29 13:19:24 2005 -0700 1.3 @@ -102,9 +102,9 @@ for i in `find . -type l`; do rm -f $i; 1.4 relative_lndir ${RS} 1.5 rm -f mkbuildtree 1.6 1.7 -set ${RS}/../linux-2.6-xen-sparse 1.8 -[ "$1" == "${RS}/../linux-2.6-xen-sparse" ] && { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } 1.9 -LINUX_26="$1" 1.10 +LINUX_26=${RS}/../linux-2.6-xen-sparse 1.11 +[ -d $LINUX_26 ] || { echo "no Linux 2.6 sparse tree at ${RS}/../linux-2.6-xen-sparse"; exit 1; } 1.12 + 1.13 1.14 # Create links to the shared definitions of the Xen interfaces. 1.15 rm -rf ${AD}/include/asm-xen/xen-public
2.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Fri Aug 26 13:47:16 2005 -0700 2.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig Mon Aug 29 13:19:24 2005 -0700 2.3 @@ -807,8 +807,8 @@ choice 2.4 direct access method and falls back to the BIOS if that doesn't 2.5 work. If unsure, go with the default, which is "Any". 2.6 2.7 -config PCI_GOBIOS 2.8 - bool "BIOS" 2.9 +#config PCI_GOBIOS 2.10 +# bool "BIOS" 2.11 2.12 config PCI_GOMMCONFIG 2.13 bool "MMConfig" 2.14 @@ -821,10 +821,10 @@ config PCI_GOANY 2.15 2.16 endchoice 2.17 2.18 -config PCI_BIOS 2.19 - bool 2.20 - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) 2.21 - default y 2.22 +#config PCI_BIOS 2.23 +# bool 2.24 +# depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) 2.25 +# default y 2.26 2.27 config PCI_DIRECT 2.28 bool
3.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Fri Aug 26 13:47:16 2005 -0700 3.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c Mon Aug 29 13:19:24 2005 -0700 3.3 @@ -610,7 +610,7 @@ static int __init acpi_parse_fadt(unsign 3.4 acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode; 3.5 #endif 3.6 3.7 -#ifdef CONFIG_X86_PM_TIMER 3.8 +#if defined(CONFIG_X86_PM_TIMER) && !defined(CONFIG_XEN) 3.9 /* detect the location of the ACPI PM Timer */ 3.10 if (fadt->revision >= FADT2_REVISION_ID) { 3.11 /* FADT rev. 2 */
4.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Fri Aug 26 13:47:16 2005 -0700 4.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Mon Aug 29 13:19:24 2005 -0700 4.3 @@ -135,6 +135,10 @@ static inline void play_dead(void) 4.4 * low exit latency (ie sit in a loop waiting for 4.5 * somebody to say that they'd like to reschedule) 4.6 */ 4.7 +#ifdef CONFIG_SMP 4.8 +extern void smp_suspend(void); 4.9 +extern void smp_resume(void); 4.10 +#endif 4.11 void cpu_idle (void) 4.12 { 4.13 int cpu = _smp_processor_id(); 4.14 @@ -149,6 +153,9 @@ void cpu_idle (void) 4.15 4.16 if (cpu_is_offline(cpu)) { 4.17 local_irq_disable(); 4.18 +#ifdef CONFIG_SMP 4.19 + smp_suspend(); 4.20 +#endif 4.21 #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU) 4.22 /* Ack it. From this point on until 4.23 we get woken up, we're not allowed 4.24 @@ -159,6 +166,9 @@ void cpu_idle (void) 4.25 HYPERVISOR_vcpu_down(cpu); 4.26 #endif 4.27 play_dead(); 4.28 +#ifdef CONFIG_SMP 4.29 + smp_resume(); 4.30 +#endif 4.31 local_irq_enable(); 4.32 } 4.33 4.34 @@ -789,10 +799,3 @@ unsigned long arch_align_stack(unsigned 4.35 sp -= get_random_int() % 8192; 4.36 return sp & ~0xf; 4.37 } 4.38 - 4.39 - 4.40 -#ifndef CONFIG_X86_SMP 4.41 -void _restore_vcpu(void) 4.42 -{ 4.43 -} 4.44 -#endif
5.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Fri Aug 26 13:47:16 2005 -0700 5.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Mon Aug 29 13:19:24 2005 -0700 5.3 @@ -1601,32 +1601,71 @@ extern void local_teardown_timer_irq(voi 5.4 5.5 void smp_suspend(void) 5.6 { 5.7 - /* XXX todo: take down time and ipi's on all cpus */ 5.8 local_teardown_timer_irq(); 5.9 smp_intr_exit(); 5.10 } 5.11 5.12 void smp_resume(void) 5.13 { 5.14 - /* XXX todo: restore time and ipi's on all cpus */ 5.15 smp_intr_init(); 5.16 local_setup_timer_irq(); 5.17 } 5.18 5.19 -DECLARE_PER_CPU(int, timer_irq); 5.20 - 5.21 -void _restore_vcpu(void) 5.22 -{ 5.23 - int cpu = smp_processor_id(); 5.24 - extern atomic_t vcpus_rebooting; 5.25 +static atomic_t vcpus_rebooting; 5.26 5.27 - /* We are the first thing the vcpu runs when it comes back, 5.28 - and we are supposed to restore the IPIs and timer 5.29 - interrupts etc. When we return, the vcpu's idle loop will 5.30 - start up again. */ 5.31 - _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu)); 5.32 - _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu)); 5.33 - _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) ); 5.34 - _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) ); 5.35 +static void restore_vcpu_ready(void) 5.36 +{ 5.37 + 5.38 atomic_dec(&vcpus_rebooting); 5.39 } 5.40 + 5.41 +void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 5.42 +{ 5.43 + int r; 5.44 + int gdt_pages; 5.45 + r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); 5.46 + if (r != 0) 5.47 + panic("pickling vcpu %d -> %d!\n", vcpu, r); 5.48 + 5.49 + /* Translate from machine to physical addresses where necessary, 5.50 + so that they can be translated to our new machine address space 5.51 + after resume. libxc is responsible for doing this to vcpu0, 5.52 + but we do it to the others. */ 5.53 + gdt_pages = (ctxt->gdt_ents + 511) / 512; 5.54 + ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); 5.55 + for (r = 0; r < gdt_pages; r++) 5.56 + ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); 5.57 +} 5.58 + 5.59 +int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 5.60 +{ 5.61 + int r; 5.62 + int gdt_pages = (ctxt->gdt_ents + 511) / 512; 5.63 + 5.64 + /* This is kind of a hack, and implicitly relies on the fact that 5.65 + the vcpu stops in a place where all of the call clobbered 5.66 + registers are already dead. */ 5.67 + ctxt->user_regs.esp -= 4; 5.68 + ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; 5.69 + ctxt->user_regs.eip = (unsigned long)restore_vcpu_ready; 5.70 + 5.71 + /* De-canonicalise. libxc handles this for vcpu 0, but we need 5.72 + to do it for the other vcpus. */ 5.73 + ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); 5.74 + for (r = 0; r < gdt_pages; r++) 5.75 + ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); 5.76 + 5.77 + atomic_set(&vcpus_rebooting, 1); 5.78 + r = HYPERVISOR_boot_vcpu(vcpu, ctxt); 5.79 + if (r != 0) { 5.80 + printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); 5.81 + return -1; 5.82 + } 5.83 + 5.84 + /* Make sure we wait for the new vcpu to come up before trying to do 5.85 + anything with it or starting the next one. */ 5.86 + while (atomic_read(&vcpus_rebooting)) 5.87 + barrier(); 5.88 + 5.89 + return 0; 5.90 +}
6.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Fri Aug 26 13:47:16 2005 -0700 6.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Mon Aug 29 13:19:24 2005 -0700 6.3 @@ -219,6 +219,8 @@ static void __init kernel_physical_mappi 6.4 } 6.5 } 6.6 6.7 +#ifndef CONFIG_XEN 6.8 + 6.9 static inline int page_kills_ppro(unsigned long pagenr) 6.10 { 6.11 if (pagenr >= 0x70000 && pagenr <= 0x7003F) 6.12 @@ -266,6 +268,13 @@ static inline int page_is_ram(unsigned l 6.13 return 0; 6.14 } 6.15 6.16 +#else /* CONFIG_XEN */ 6.17 + 6.18 +#define page_kills_ppro(p) 0 6.19 +#define page_is_ram(p) 1 6.20 + 6.21 +#endif 6.22 + 6.23 #ifdef CONFIG_HIGHMEM 6.24 pte_t *kmap_pte; 6.25 pgprot_t kmap_prot;
7.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Fri Aug 26 13:47:16 2005 -0700 7.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Mon Aug 29 13:19:24 2005 -0700 7.3 @@ -4,7 +4,7 @@ CFLAGS += -Iarch/$(XENARCH)/pci 7.4 7.5 c-obj-y := i386.o 7.6 7.7 -c-obj-$(CONFIG_PCI_BIOS) += pcbios.o 7.8 +#c-obj-$(CONFIG_PCI_BIOS) += pcbios.o 7.9 c-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o 7.10 c-obj-$(CONFIG_PCI_DIRECT) += direct.o 7.11
8.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Fri Aug 26 13:47:16 2005 -0700 8.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Mon Aug 29 13:19:24 2005 -0700 8.3 @@ -245,74 +245,6 @@ void unbind_virq_from_irq(int virq) 8.4 spin_unlock(&irq_mapping_update_lock); 8.5 } 8.6 8.7 -/* This is only used when a vcpu from an xm save. The ipi is expected 8.8 - to have been bound before we suspended, and so all of the xenolinux 8.9 - state is set up; we only need to restore the Xen side of things. 8.10 - The irq number has to be the same, but the evtchn number can 8.11 - change. */ 8.12 -void _bind_ipi_to_irq(int ipi, int vcpu, int irq) 8.13 -{ 8.14 - evtchn_op_t op; 8.15 - int evtchn; 8.16 - 8.17 - spin_lock(&irq_mapping_update_lock); 8.18 - 8.19 - op.cmd = EVTCHNOP_bind_ipi; 8.20 - if ( HYPERVISOR_event_channel_op(&op) != 0 ) 8.21 - panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu); 8.22 - evtchn = op.u.bind_ipi.port; 8.23 - 8.24 - printk("<0>IPI %d, old evtchn %d, evtchn %d.\n", 8.25 - ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi], 8.26 - evtchn); 8.27 - 8.28 - evtchn_to_irq[irq_to_evtchn[irq]] = -1; 8.29 - irq_to_evtchn[irq] = -1; 8.30 - 8.31 - evtchn_to_irq[evtchn] = irq; 8.32 - irq_to_evtchn[irq] = evtchn; 8.33 - 8.34 - printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn, 8.35 - evtchn_to_irq[evtchn]); 8.36 - per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn; 8.37 - 8.38 - bind_evtchn_to_cpu(evtchn, vcpu); 8.39 - 8.40 - spin_unlock(&irq_mapping_update_lock); 8.41 - 8.42 - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask); 8.43 - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending); 8.44 -} 8.45 - 8.46 -void _bind_virq_to_irq(int virq, int cpu, int irq) 8.47 -{ 8.48 - evtchn_op_t op; 8.49 - int evtchn; 8.50 - 8.51 - spin_lock(&irq_mapping_update_lock); 8.52 - 8.53 - op.cmd = EVTCHNOP_bind_virq; 8.54 - op.u.bind_virq.virq = virq; 8.55 - if ( HYPERVISOR_event_channel_op(&op) != 0 ) 8.56 - panic("Failed to bind virtual IRQ %d\n", virq); 8.57 - evtchn = op.u.bind_virq.port; 8.58 - 8.59 - evtchn_to_irq[irq_to_evtchn[irq]] = -1; 8.60 - irq_to_evtchn[irq] = -1; 8.61 - 8.62 - evtchn_to_irq[evtchn] = irq; 8.63 - irq_to_evtchn[irq] = evtchn; 8.64 - 8.65 - per_cpu(virq_to_irq, cpu)[virq] = irq; 8.66 - 8.67 - bind_evtchn_to_cpu(evtchn, cpu); 8.68 - 8.69 - spin_unlock(&irq_mapping_update_lock); 8.70 - 8.71 - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask); 8.72 - clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending); 8.73 -} 8.74 - 8.75 int bind_ipi_to_irq(int ipi) 8.76 { 8.77 evtchn_op_t op;
9.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Aug 26 13:47:16 2005 -0700 9.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Mon Aug 29 13:19:24 2005 -0700 9.3 @@ -65,66 +65,13 @@ static int shutting_down = SHUTDOWN_INVA 9.4 #define cpu_up(x) (-EOPNOTSUPP) 9.5 #endif 9.6 9.7 -static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 9.8 -{ 9.9 - int r; 9.10 - int gdt_pages; 9.11 - r = HYPERVISOR_vcpu_pickle(vcpu, ctxt); 9.12 - if (r != 0) 9.13 - panic("pickling vcpu %d -> %d!\n", vcpu, r); 9.14 - 9.15 - /* Translate from machine to physical addresses where necessary, 9.16 - so that they can be translated to our new machine address space 9.17 - after resume. libxc is responsible for doing this to vcpu0, 9.18 - but we do it to the others. */ 9.19 - gdt_pages = (ctxt->gdt_ents + 511) / 512; 9.20 - ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]); 9.21 - for (r = 0; r < gdt_pages; r++) 9.22 - ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]); 9.23 -} 9.24 - 9.25 -void _restore_vcpu(int cpu); 9.26 - 9.27 -atomic_t vcpus_rebooting; 9.28 - 9.29 -static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 9.30 -{ 9.31 - int r; 9.32 - int gdt_pages = (ctxt->gdt_ents + 511) / 512; 9.33 - 9.34 - /* This is kind of a hack, and implicitly relies on the fact that 9.35 - the vcpu stops in a place where all of the call clobbered 9.36 - registers are already dead. */ 9.37 - ctxt->user_regs.esp -= 4; 9.38 - ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip; 9.39 - ctxt->user_regs.eip = (unsigned long)_restore_vcpu; 9.40 - 9.41 - /* De-canonicalise. libxc handles this for vcpu 0, but we need 9.42 - to do it for the other vcpus. */ 9.43 - ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]); 9.44 - for (r = 0; r < gdt_pages; r++) 9.45 - ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]); 9.46 - 9.47 - atomic_set(&vcpus_rebooting, 1); 9.48 - r = HYPERVISOR_boot_vcpu(vcpu, ctxt); 9.49 - if (r != 0) { 9.50 - printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r); 9.51 - return -1; 9.52 - } 9.53 - 9.54 - /* Make sure we wait for the new vcpu to come up before trying to do 9.55 - anything with it or starting the next one. */ 9.56 - while (atomic_read(&vcpus_rebooting)) 9.57 - barrier(); 9.58 - 9.59 - return 0; 9.60 -} 9.61 +#ifdef CONFIG_SMP 9.62 +#endif 9.63 9.64 static int __do_suspend(void *ignore) 9.65 { 9.66 int i, j; 9.67 suspend_record_t *suspend_record; 9.68 - static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; 9.69 9.70 /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */ 9.71 /* XXX SMH: yes it would :-( */ 9.72 @@ -138,16 +85,22 @@ static int __do_suspend(void *ignore) 9.73 extern int gnttab_suspend(void); 9.74 extern int gnttab_resume(void); 9.75 9.76 -#ifdef CONFIG_SMP 9.77 - extern void smp_suspend(void); 9.78 - extern void smp_resume(void); 9.79 -#endif 9.80 extern void time_suspend(void); 9.81 extern void time_resume(void); 9.82 extern unsigned long max_pfn; 9.83 extern unsigned int *pfn_to_mfn_frame_list; 9.84 9.85 +#ifdef CONFIG_SMP 9.86 + extern void smp_suspend(void); 9.87 + extern void smp_resume(void); 9.88 + 9.89 + static vcpu_guest_context_t suspended_cpu_records[NR_CPUS]; 9.90 cpumask_t prev_online_cpus, prev_present_cpus; 9.91 + 9.92 + void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); 9.93 + int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt); 9.94 +#endif 9.95 + 9.96 int err = 0; 9.97 9.98 BUG_ON(smp_processor_id() != 0); 9.99 @@ -164,6 +117,8 @@ static int __do_suspend(void *ignore) 9.100 if ( suspend_record == NULL ) 9.101 goto out; 9.102 9.103 + preempt_disable(); 9.104 +#ifdef CONFIG_SMP 9.105 /* Take all of the other cpus offline. We need to be careful not 9.106 to get preempted between the final test for num_online_cpus() 9.107 == 1 and disabling interrupts, since otherwise userspace could 9.108 @@ -175,7 +130,6 @@ static int __do_suspend(void *ignore) 9.109 since by the time num_online_cpus() == 1, there aren't any 9.110 other cpus) */ 9.111 cpus_clear(prev_online_cpus); 9.112 - preempt_disable(); 9.113 while (num_online_cpus() > 1) { 9.114 preempt_enable(); 9.115 for_each_online_cpu(i) { 9.116 @@ -190,6 +144,7 @@ static int __do_suspend(void *ignore) 9.117 } 9.118 preempt_disable(); 9.119 } 9.120 +#endif 9.121 9.122 suspend_record->nr_pfns = max_pfn; /* final number of pfns */ 9.123 9.124 @@ -197,6 +152,7 @@ static int __do_suspend(void *ignore) 9.125 9.126 preempt_enable(); 9.127 9.128 +#ifdef CONFIG_SMP 9.129 cpus_clear(prev_present_cpus); 9.130 for_each_present_cpu(i) { 9.131 if (i == 0) 9.132 @@ -204,6 +160,7 @@ static int __do_suspend(void *ignore) 9.133 save_vcpu_context(i, &suspended_cpu_records[i]); 9.134 cpu_set(i, prev_present_cpus); 9.135 } 9.136 +#endif 9.137 9.138 #ifdef __i386__ 9.139 mm_pin_all(); 9.140 @@ -269,12 +226,14 @@ static int __do_suspend(void *ignore) 9.141 9.142 usbif_resume(); 9.143 9.144 - for_each_cpu_mask(i, prev_present_cpus) { 9.145 +#ifdef CONFIG_SMP 9.146 + for_each_cpu_mask(i, prev_present_cpus) 9.147 restore_vcpu_context(i, &suspended_cpu_records[i]); 9.148 - } 9.149 +#endif 9.150 9.151 __sti(); 9.152 9.153 +#ifdef CONFIG_SMP 9.154 out_reenable_cpus: 9.155 for_each_cpu_mask(i, prev_online_cpus) { 9.156 j = cpu_up(i); 9.157 @@ -284,6 +243,7 @@ static int __do_suspend(void *ignore) 9.158 err = j; 9.159 } 9.160 } 9.161 +#endif 9.162 9.163 out: 9.164 if ( suspend_record != NULL )
10.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Fri Aug 26 13:47:16 2005 -0700 10.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Mon Aug 29 13:19:24 2005 -0700 10.3 @@ -40,7 +40,7 @@ obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o 10.4 i386-obj-$(CONFIG_DUMMY_IOMMU) += pci-dma.o 10.5 i386-obj-$(CONFIG_SWIOTLB) += swiotlb.o 10.6 obj-$(CONFIG_KPROBES) += kprobes.o 10.7 -obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 10.8 +#obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o 10.9 10.10 c-obj-$(CONFIG_MODULES) += module.o 10.11
11.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Fri Aug 26 13:47:16 2005 -0700 11.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Mon Aug 29 13:19:24 2005 -0700 11.3 @@ -1277,21 +1277,23 @@ extern void local_teardown_timer_irq(voi 11.4 11.5 void smp_suspend(void) 11.6 { 11.7 - /* XXX todo: take down time and ipi's on all cpus */ 11.8 local_teardown_timer_irq(); 11.9 smp_intr_exit(); 11.10 } 11.11 11.12 void smp_resume(void) 11.13 { 11.14 - /* XXX todo: restore time and ipi's on all cpus */ 11.15 smp_intr_init(); 11.16 local_setup_timer_irq(); 11.17 } 11.18 11.19 -void _restore_vcpu(void) 11.20 +void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 11.21 { 11.22 - /* XXX need to write this */ 11.23 +} 11.24 + 11.25 +int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt) 11.26 +{ 11.27 + return 0; 11.28 } 11.29 11.30 #endif
12.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Aug 26 13:47:16 2005 -0700 12.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Mon Aug 29 13:19:24 2005 -0700 12.3 @@ -767,9 +767,6 @@ void __init clear_kernel_mapping(unsigne 12.4 12.5 static inline int page_is_ram (unsigned long pagenr) 12.6 { 12.7 - if (pagenr < start_pfn || pagenr >= end_pfn) 12.8 - return 0; 12.9 - 12.10 return 1; 12.11 } 12.12
13.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Aug 26 13:47:16 2005 -0700 13.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Mon Aug 29 13:19:24 2005 -0700 13.3 @@ -295,10 +295,10 @@ static struct xenbus_watch target_watch 13.4 /* React to a change in the target key */ 13.5 static void watch_target(struct xenbus_watch *watch, const char *node) 13.6 { 13.7 - unsigned long new_target; 13.8 + unsigned long long new_target; 13.9 int err; 13.10 13.11 - err = xenbus_scanf("memory", "target", "%lu", &new_target); 13.12 + err = xenbus_scanf("memory", "target", "%llu", &new_target); 13.13 if (err != 1) { 13.14 printk(KERN_ERR "Unable to read memory/target\n"); 13.15 return;
14.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 26 13:47:16 2005 -0700 14.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Aug 29 13:19:24 2005 -0700 14.3 @@ -32,23 +32,15 @@ 14.4 */ 14.5 14.6 #if 1 14.7 -#define ASSERT(_p) \ 14.8 - if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ 14.9 - __LINE__, __FILE__); *(int*)0=0; } 14.10 +#define ASSERT(p) \ 14.11 + if (!(p)) { printk("Assertion '%s' failed, line %d, file %s", #p , \ 14.12 + __LINE__, __FILE__); *(int*)0=0; } 14.13 #else 14.14 #define ASSERT(_p) 14.15 #endif 14.16 14.17 #include <linux/version.h> 14.18 - 14.19 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 14.20 #include "block.h" 14.21 -#else 14.22 -#include "common.h" 14.23 -#include <linux/blk.h> 14.24 -#include <linux/tqueue.h> 14.25 -#endif 14.26 - 14.27 #include <linux/cdrom.h> 14.28 #include <linux/sched.h> 14.29 #include <linux/interrupt.h> 14.30 @@ -58,91 +50,57 @@ 14.31 #include <asm-xen/xen-public/grant_table.h> 14.32 #include <asm-xen/gnttab.h> 14.33 14.34 -typedef unsigned char byte; /* from linux/ide.h */ 14.35 - 14.36 -/* Control whether runtime update of vbds is enabled. */ 14.37 -#define ENABLE_VBD_UPDATE 1 14.38 - 14.39 #define BLKIF_STATE_DISCONNECTED 0 14.40 #define BLKIF_STATE_CONNECTED 1 14.41 14.42 static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED; 14.43 14.44 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 14.45 - 14.46 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 14.47 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) 14.48 #define GRANTREF_INVALID (1<<15) 14.49 14.50 -static struct blk_shadow { 14.51 - blkif_request_t req; 14.52 - unsigned long request; 14.53 - unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 14.54 -} blk_shadow[BLK_RING_SIZE]; 14.55 -unsigned long blk_shadow_free; 14.56 - 14.57 static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ 14.58 14.59 static void kick_pending_request_queues(struct blkfront_info *info); 14.60 14.61 -static int __init xlblk_init(void); 14.62 - 14.63 static void blkif_completion(struct blk_shadow *s); 14.64 14.65 -static inline int GET_ID_FROM_FREELIST(void) 14.66 +static inline int GET_ID_FROM_FREELIST( 14.67 + struct blkfront_info *info) 14.68 { 14.69 - unsigned long free = blk_shadow_free; 14.70 - BUG_ON(free > BLK_RING_SIZE); 14.71 - blk_shadow_free = blk_shadow[free].req.id; 14.72 - blk_shadow[free].req.id = 0x0fffffee; /* debug */ 14.73 - return free; 14.74 + unsigned long free = info->shadow_free; 14.75 + BUG_ON(free > BLK_RING_SIZE); 14.76 + info->shadow_free = info->shadow[free].req.id; 14.77 + info->shadow[free].req.id = 0x0fffffee; /* debug */ 14.78 + return free; 14.79 } 14.80 14.81 -static inline void ADD_ID_TO_FREELIST(unsigned long id) 14.82 +static inline void ADD_ID_TO_FREELIST( 14.83 + struct blkfront_info *info, unsigned long id) 14.84 { 14.85 - blk_shadow[id].req.id = blk_shadow_free; 14.86 - blk_shadow[id].request = 0; 14.87 - blk_shadow_free = id; 14.88 + info->shadow[id].req.id = info->shadow_free; 14.89 + info->shadow[id].request = 0; 14.90 + info->shadow_free = id; 14.91 } 14.92 14.93 - 14.94 -/************************ COMMON CODE (inlined) ************************/ 14.95 - 14.96 -/* Kernel-specific definitions used in the common code */ 14.97 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 14.98 -#define DISABLE_SCATTERGATHER() 14.99 -#else 14.100 -static int sg_operation = -1; 14.101 -#define DISABLE_SCATTERGATHER() (sg_operation = -1) 14.102 -#endif 14.103 - 14.104 static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) 14.105 { 14.106 14.107 - s->req = *r; 14.108 + s->req = *r; 14.109 } 14.110 14.111 static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) 14.112 { 14.113 14.114 - *r = s->req; 14.115 + *r = s->req; 14.116 } 14.117 14.118 - 14.119 static inline void flush_requests(struct blkfront_info *info) 14.120 { 14.121 - DISABLE_SCATTERGATHER(); 14.122 - RING_PUSH_REQUESTS(&info->ring); 14.123 - notify_via_evtchn(info->evtchn); 14.124 + RING_PUSH_REQUESTS(&info->ring); 14.125 + notify_via_evtchn(info->evtchn); 14.126 } 14.127 14.128 - 14.129 -/************************** KERNEL VERSION 2.6 **************************/ 14.130 - 14.131 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 14.132 - 14.133 -module_init(xlblk_init); 14.134 - 14.135 static void kick_pending_request_queues(struct blkfront_info *info) 14.136 { 14.137 if (!RING_FULL(&info->ring)) { 14.138 @@ -169,50 +127,44 @@ static void blkif_restart_queue_callback 14.139 14.140 int blkif_open(struct inode *inode, struct file *filep) 14.141 { 14.142 - // struct gendisk *gd = inode->i_bdev->bd_disk; 14.143 - // struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data; 14.144 - 14.145 - /* Update of usage count is protected by per-device semaphore. */ 14.146 - // di->mi->usage++; 14.147 - 14.148 return 0; 14.149 } 14.150 14.151 14.152 int blkif_release(struct inode *inode, struct file *filep) 14.153 { 14.154 - /* FIXME: This is where we can actually free up majors, etc. --RR */ 14.155 - return 0; 14.156 + return 0; 14.157 } 14.158 14.159 14.160 int blkif_ioctl(struct inode *inode, struct file *filep, 14.161 unsigned command, unsigned long argument) 14.162 { 14.163 - int i; 14.164 - 14.165 - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 14.166 - command, (long)argument, inode->i_rdev); 14.167 - 14.168 - switch ( command ) 14.169 - { 14.170 - case HDIO_GETGEO: 14.171 - /* return ENOSYS to use defaults */ 14.172 - return -ENOSYS; 14.173 + int i; 14.174 14.175 - case CDROMMULTISESSION: 14.176 - DPRINTK("FIXME: support multisession CDs later\n"); 14.177 - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) 14.178 - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; 14.179 - return 0; 14.180 + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 14.181 + command, (long)argument, inode->i_rdev); 14.182 14.183 - default: 14.184 - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 14.185 - command);*/ 14.186 - return -EINVAL; /* same return as native Linux */ 14.187 - } 14.188 + switch ( command ) 14.189 + { 14.190 + case HDIO_GETGEO: 14.191 + /* return ENOSYS to use defaults */ 14.192 + return -ENOSYS; 14.193 14.194 - return 0; 14.195 + case CDROMMULTISESSION: 14.196 + DPRINTK("FIXME: support multisession CDs later\n"); 14.197 + for (i = 0; i < sizeof(struct cdrom_multisession); i++) 14.198 + if (put_user(0, (char *)(argument + i))) 14.199 + return -EFAULT; 14.200 + return 0; 14.201 + 14.202 + default: 14.203 + /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 14.204 + command);*/ 14.205 + return -EINVAL; /* same return as native Linux */ 14.206 + } 14.207 + 14.208 + return 0; 14.209 } 14.210 14.211 14.212 @@ -228,76 +180,77 @@ int blkif_ioctl(struct inode *inode, str 14.213 */ 14.214 static int blkif_queue_request(struct request *req) 14.215 { 14.216 - struct blkfront_info *info = req->rq_disk->private_data; 14.217 - unsigned long buffer_ma; 14.218 - blkif_request_t *ring_req; 14.219 - struct bio *bio; 14.220 - struct bio_vec *bvec; 14.221 - int idx; 14.222 - unsigned long id; 14.223 - unsigned int fsect, lsect; 14.224 - int ref; 14.225 - grant_ref_t gref_head; 14.226 - 14.227 - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 14.228 - return 1; 14.229 - 14.230 - if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST, 14.231 - &gref_head) < 0) { 14.232 - gnttab_request_free_callback(&info->callback, 14.233 - blkif_restart_queue_callback, info, 14.234 - BLKIF_MAX_SEGMENTS_PER_REQUEST); 14.235 - return 1; 14.236 - } 14.237 - 14.238 - /* Fill out a communications ring structure. */ 14.239 - ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 14.240 - id = GET_ID_FROM_FREELIST(); 14.241 - blk_shadow[id].request = (unsigned long)req; 14.242 - 14.243 - ring_req->id = id; 14.244 - ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; 14.245 - ring_req->sector_number = (blkif_sector_t)req->sector; 14.246 - ring_req->handle = info->handle; 14.247 + struct blkfront_info *info = req->rq_disk->private_data; 14.248 + unsigned long buffer_ma; 14.249 + blkif_request_t *ring_req; 14.250 + struct bio *bio; 14.251 + struct bio_vec *bvec; 14.252 + int idx; 14.253 + unsigned long id; 14.254 + unsigned int fsect, lsect; 14.255 + int ref; 14.256 + grant_ref_t gref_head; 14.257 14.258 - ring_req->nr_segments = 0; 14.259 - rq_for_each_bio(bio, req) 14.260 - { 14.261 - bio_for_each_segment(bvec, bio, idx) 14.262 - { 14.263 - if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST ) 14.264 - BUG(); 14.265 - buffer_ma = page_to_phys(bvec->bv_page); 14.266 - fsect = bvec->bv_offset >> 9; 14.267 - lsect = fsect + (bvec->bv_len >> 9) - 1; 14.268 - /* install a grant reference. */ 14.269 - ref = gnttab_claim_grant_reference(&gref_head); 14.270 - ASSERT( ref != -ENOSPC ); 14.271 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 14.272 + return 1; 14.273 14.274 - gnttab_grant_foreign_access_ref( 14.275 - ref, 14.276 - info->backend_id, 14.277 - buffer_ma >> PAGE_SHIFT, 14.278 - rq_data_dir(req) ); 14.279 - 14.280 - blk_shadow[id].frame[ring_req->nr_segments] = 14.281 - buffer_ma >> PAGE_SHIFT; 14.282 - 14.283 - ring_req->frame_and_sects[ring_req->nr_segments] = 14.284 - blkif_fas_from_gref(ref, fsect, lsect); 14.285 + if (gnttab_alloc_grant_references( 14.286 + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 14.287 + gnttab_request_free_callback( 14.288 + &info->callback, 14.289 + blkif_restart_queue_callback, 14.290 + info, 14.291 + BLKIF_MAX_SEGMENTS_PER_REQUEST); 14.292 + return 1; 14.293 + } 14.294 14.295 - ring_req->nr_segments++; 14.296 - } 14.297 - } 14.298 - 14.299 - info->ring.req_prod_pvt++; 14.300 + /* Fill out a communications ring structure. */ 14.301 + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 14.302 + id = GET_ID_FROM_FREELIST(info); 14.303 + info->shadow[id].request = (unsigned long)req; 14.304 14.305 - /* Keep a private copy so we can reissue requests when recovering. */ 14.306 - pickle_request(&blk_shadow[id], ring_req); 14.307 + ring_req->id = id; 14.308 + ring_req->operation = rq_data_dir(req) ? 14.309 + BLKIF_OP_WRITE : BLKIF_OP_READ; 14.310 + ring_req->sector_number = (blkif_sector_t)req->sector; 14.311 + ring_req->handle = info->handle; 14.312 14.313 - gnttab_free_grant_references(gref_head); 14.314 + ring_req->nr_segments = 0; 14.315 + rq_for_each_bio (bio, req) { 14.316 + bio_for_each_segment (bvec, bio, idx) { 14.317 + BUG_ON(ring_req->nr_segments 14.318 + == BLKIF_MAX_SEGMENTS_PER_REQUEST); 14.319 + buffer_ma = page_to_phys(bvec->bv_page); 14.320 + fsect = bvec->bv_offset >> 9; 14.321 + lsect = fsect + (bvec->bv_len >> 9) - 1; 14.322 + /* install a grant reference. */ 14.323 + ref = gnttab_claim_grant_reference(&gref_head); 14.324 + ASSERT(ref != -ENOSPC); 14.325 14.326 - return 0; 14.327 + gnttab_grant_foreign_access_ref( 14.328 + ref, 14.329 + info->backend_id, 14.330 + buffer_ma >> PAGE_SHIFT, 14.331 + rq_data_dir(req) ); 14.332 + 14.333 + info->shadow[id].frame[ring_req->nr_segments] = 14.334 + buffer_ma >> PAGE_SHIFT; 14.335 + 14.336 + ring_req->frame_and_sects[ring_req->nr_segments] = 14.337 + blkif_fas_from_gref(ref, fsect, lsect); 14.338 + 14.339 + ring_req->nr_segments++; 14.340 + } 14.341 + } 14.342 + 14.343 + info->ring.req_prod_pvt++; 14.344 + 14.345 + /* Keep a private copy so we can reissue requests when recovering. */ 14.346 + pickle_request(&info->shadow[id], ring_req); 14.347 + 14.348 + gnttab_free_grant_references(gref_head); 14.349 + 14.350 + return 0; 14.351 } 14.352 14.353 /* 14.354 @@ -306,756 +259,197 @@ static int blkif_queue_request(struct re 14.355 */ 14.356 void do_blkif_request(request_queue_t *rq) 14.357 { 14.358 - struct blkfront_info *info = NULL; 14.359 - struct request *req; 14.360 - int queued; 14.361 - 14.362 - DPRINTK("Entered do_blkif_request\n"); 14.363 - 14.364 - queued = 0; 14.365 - 14.366 - while ( (req = elv_next_request(rq)) != NULL ) 14.367 - { 14.368 - info = req->rq_disk->private_data; 14.369 - 14.370 - if ( !blk_fs_request(req) ) 14.371 - { 14.372 - end_request(req, 0); 14.373 - continue; 14.374 - } 14.375 + struct blkfront_info *info = NULL; 14.376 + struct request *req; 14.377 + int queued; 14.378 14.379 - if (RING_FULL(&info->ring)) 14.380 - goto wait; 14.381 - 14.382 - DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", 14.383 - req, req->cmd, req->sector, req->current_nr_sectors, 14.384 - req->nr_sectors, req->buffer, 14.385 - rq_data_dir(req) ? "write" : "read"); 14.386 + DPRINTK("Entered do_blkif_request\n"); 14.387 14.388 - blkdev_dequeue_request(req); 14.389 - if (blkif_queue_request(req)) { 14.390 - blk_requeue_request(rq, req); 14.391 - wait: 14.392 - /* Avoid pointless unplugs. */ 14.393 - blk_stop_queue(rq); 14.394 - break; 14.395 - } 14.396 + queued = 0; 14.397 14.398 - queued++; 14.399 - } 14.400 + while ((req = elv_next_request(rq)) != NULL) { 14.401 + info = req->rq_disk->private_data; 14.402 14.403 - if ( queued != 0 ) 14.404 - flush_requests(info); 14.405 + if (!blk_fs_request(req)) { 14.406 + end_request(req, 0); 14.407 + continue; 14.408 + } 14.409 + 14.410 + if (RING_FULL(&info->ring)) 14.411 + goto wait; 14.412 + 14.413 + DPRINTK("do_blk_req %p: cmd %p, sec %lx, " 14.414 + "(%u/%li) buffer:%p [%s]\n", 14.415 + req, req->cmd, req->sector, req->current_nr_sectors, 14.416 + req->nr_sectors, req->buffer, 14.417 + rq_data_dir(req) ? "write" : "read"); 14.418 + 14.419 + blkdev_dequeue_request(req); 14.420 + if (blkif_queue_request(req)) { 14.421 + blk_requeue_request(rq, req); 14.422 + wait: 14.423 + /* Avoid pointless unplugs. */ 14.424 + blk_stop_queue(rq); 14.425 + break; 14.426 + } 14.427 + 14.428 + queued++; 14.429 + } 14.430 + 14.431 + if (queued != 0) 14.432 + flush_requests(info); 14.433 } 14.434 14.435 14.436 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 14.437 { 14.438 - struct request *req; 14.439 - blkif_response_t *bret; 14.440 - RING_IDX i, rp; 14.441 - unsigned long flags; 14.442 - struct blkfront_info *info = (struct blkfront_info *)dev_id; 14.443 - 14.444 - spin_lock_irqsave(&blkif_io_lock, flags); 14.445 - 14.446 - if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { 14.447 - spin_unlock_irqrestore(&blkif_io_lock, flags); 14.448 - return IRQ_HANDLED; 14.449 - } 14.450 - 14.451 - rp = info->ring.sring->rsp_prod; 14.452 - rmb(); /* Ensure we see queued responses up to 'rp'. */ 14.453 - 14.454 - for ( i = info->ring.rsp_cons; i != rp; i++ ) 14.455 - { 14.456 - unsigned long id; 14.457 - 14.458 - bret = RING_GET_RESPONSE(&info->ring, i); 14.459 - id = bret->id; 14.460 - req = (struct request *)blk_shadow[id].request; 14.461 - 14.462 - blkif_completion(&blk_shadow[id]); 14.463 - 14.464 - ADD_ID_TO_FREELIST(id); 14.465 - 14.466 - switch ( bret->operation ) 14.467 - { 14.468 - case BLKIF_OP_READ: 14.469 - case BLKIF_OP_WRITE: 14.470 - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 14.471 - DPRINTK("Bad return from blkdev data request: %x\n", 14.472 - bret->status); 14.473 - 14.474 - if ( unlikely(end_that_request_first 14.475 - (req, 14.476 - (bret->status == BLKIF_RSP_OKAY), 14.477 - req->hard_nr_sectors)) ) 14.478 - BUG(); 14.479 - end_that_request_last(req); 14.480 - 14.481 - break; 14.482 - default: 14.483 - BUG(); 14.484 - } 14.485 - } 14.486 - 14.487 - info->ring.rsp_cons = i; 14.488 - 14.489 - kick_pending_request_queues(info); 14.490 - 14.491 - spin_unlock_irqrestore(&blkif_io_lock, flags); 14.492 - 14.493 - return IRQ_HANDLED; 14.494 -} 14.495 - 14.496 -#else 14.497 -/************************** KERNEL VERSION 2.4 **************************/ 14.498 - 14.499 -static kdev_t sg_dev; 14.500 -static unsigned long sg_next_sect; 14.501 - 14.502 -/* 14.503 - * Request queues with outstanding work, but ring is currently full. 14.504 - * We need no special lock here, as we always access this with the 14.505 - * blkif_io_lock held. We only need a small maximum list. 14.506 - */ 14.507 -#define MAX_PENDING 8 14.508 -static request_queue_t *pending_queues[MAX_PENDING]; 14.509 -static int nr_pending; 14.510 - 14.511 - 14.512 -#define blkif_io_lock io_request_lock 14.513 - 14.514 -/*============================================================================*/ 14.515 -static void kick_pending_request_queues(void) 14.516 -{ 14.517 - /* We kick pending request queues if the ring is reasonably empty. */ 14.518 - if ( (nr_pending != 0) && 14.519 - (RING_PENDING_REQUESTS(&info->ring) < (BLK_RING_SIZE >> 1)) ) 14.520 - { 14.521 - /* Attempt to drain the queue, but bail if the ring becomes full. */ 14.522 - while ( (nr_pending != 0) && !RING_FULL(&info->ring) ) 14.523 - do_blkif_request(pending_queues[--nr_pending]); 14.524 - } 14.525 -} 14.526 - 14.527 -int blkif_open(struct inode *inode, struct file *filep) 14.528 -{ 14.529 - short xldev = inode->i_rdev; 14.530 - struct gendisk *gd = get_gendisk(xldev); 14.531 - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 14.532 - short minor = MINOR(xldev); 14.533 - 14.534 - if ( gd->part[minor].nr_sects == 0 ) 14.535 - { 14.536 - /* 14.537 - * Device either doesn't exist, or has zero capacity; we use a few 14.538 - * cheesy heuristics to return the relevant error code 14.539 - */ 14.540 - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || 14.541 - ((minor & (gd->max_p - 1)) != 0) ) 14.542 - { 14.543 - /* 14.544 - * We have a real device, but no such partition, or we just have a 14.545 - * partition number so guess this is the problem. 14.546 - */ 14.547 - return -ENXIO; /* no such device or address */ 14.548 - } 14.549 - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) 14.550 - { 14.551 - /* This is a removable device => assume that media is missing. */ 14.552 - return -ENOMEDIUM; /* media not present (this is a guess) */ 14.553 - } 14.554 - else 14.555 - { 14.556 - /* Just go for the general 'no such device' error. */ 14.557 - return -ENODEV; /* no such device */ 14.558 - } 14.559 - } 14.560 - 14.561 - /* Update of usage count is protected by per-device semaphore. */ 14.562 - disk->usage++; 14.563 - 14.564 - return 0; 14.565 -} 14.566 - 14.567 - 14.568 -int blkif_release(struct inode *inode, struct file *filep) 14.569 -{ 14.570 - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 14.571 - 14.572 - /* 14.573 - * When usage drops to zero it may allow more VBD updates to occur. 14.574 - * Update of usage count is protected by a per-device semaphore. 14.575 - */ 14.576 - if ( --disk->usage == 0 ) { 14.577 - vbd_update(); 14.578 - } 14.579 - 14.580 - return 0; 14.581 -} 14.582 - 14.583 - 14.584 -int blkif_ioctl(struct inode *inode, struct file *filep, 14.585 - unsigned command, unsigned long argument) 14.586 -{ 14.587 - kdev_t dev = inode->i_rdev; 14.588 - struct hd_geometry *geo = (struct hd_geometry *)argument; 14.589 - struct gendisk *gd; 14.590 - struct hd_struct *part; 14.591 - int i; 14.592 - unsigned short cylinders; 14.593 - byte heads, sectors; 14.594 - 14.595 - /* NB. No need to check permissions. That is done for us. */ 14.596 - 14.597 - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 14.598 - command, (long) argument, dev); 14.599 - 14.600 - gd = get_gendisk(dev); 14.601 - part = &gd->part[MINOR(dev)]; 14.602 - 14.603 - switch ( command ) 14.604 - { 14.605 - case BLKGETSIZE: 14.606 - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 14.607 - return put_user(part->nr_sects, (unsigned long *) argument); 14.608 - 14.609 - case BLKGETSIZE64: 14.610 - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, 14.611 - (u64)part->nr_sects * 512); 14.612 - return put_user((u64)part->nr_sects * 512, (u64 *) argument); 14.613 - 14.614 - case BLKRRPART: /* re-read partition table */ 14.615 - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); 14.616 - return blkif_revalidate(dev); 14.617 - 14.618 - case BLKSSZGET: 14.619 - return hardsect_size[MAJOR(dev)][MINOR(dev)]; 14.620 - 14.621 - case BLKBSZGET: /* get block size */ 14.622 - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); 14.623 - break; 14.624 - 14.625 - case BLKBSZSET: /* set block size */ 14.626 - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); 14.627 - break; 14.628 - 14.629 - case BLKRASET: /* set read-ahead */ 14.630 - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); 14.631 - break; 14.632 - 14.633 - case BLKRAGET: /* get read-ahead */ 14.634 - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); 14.635 - break; 14.636 - 14.637 - case HDIO_GETGEO: 14.638 - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); 14.639 - if (!argument) return -EINVAL; 14.640 - 14.641 - /* We don't have real geometry info, but let's at least return 14.642 - values consistent with the size of the device */ 14.643 - 14.644 - heads = 0xff; 14.645 - sectors = 0x3f; 14.646 - cylinders = part->nr_sects / (heads * sectors); 14.647 - 14.648 - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 14.649 - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; 14.650 - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; 14.651 - if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT; 14.652 - 14.653 - return 0; 14.654 - 14.655 - case HDIO_GETGEO_BIG: 14.656 - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); 14.657 - if (!argument) return -EINVAL; 14.658 - 14.659 - /* We don't have real geometry info, but let's at least return 14.660 - values consistent with the size of the device */ 14.661 - 14.662 - heads = 0xff; 14.663 - sectors = 0x3f; 14.664 - cylinders = part->nr_sects / (heads * sectors); 14.665 - 14.666 - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 14.667 - if (put_user(heads, (byte *)&geo->heads)) return -EFAULT; 14.668 - if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT; 14.669 - if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT; 14.670 - 14.671 - return 0; 14.672 - 14.673 - case CDROMMULTISESSION: 14.674 - DPRINTK("FIXME: support multisession CDs later\n"); 14.675 - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) 14.676 - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; 14.677 - return 0; 14.678 - 14.679 - case SCSI_IOCTL_GET_BUS_NUMBER: 14.680 - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); 14.681 - return -ENOSYS; 14.682 - 14.683 - default: 14.684 - WPRINTK("ioctl %08x not supported by XL blkif\n", command); 14.685 - return -ENOSYS; 14.686 - } 14.687 - 14.688 - return 0; 14.689 -} 14.690 - 14.691 - 14.692 - 14.693 -/* check media change: should probably do something here in some cases :-) */ 14.694 -int blkif_check(kdev_t dev) 14.695 -{ 14.696 - DPRINTK("blkif_check\n"); 14.697 - return 0; 14.698 -} 14.699 - 14.700 -int blkif_revalidate(kdev_t dev) 14.701 -{ 14.702 - struct block_device *bd; 14.703 - struct gendisk *gd; 14.704 - xl_disk_t *disk; 14.705 - unsigned long capacity; 14.706 - int i, rc = 0; 14.707 - 14.708 - if ( (bd = bdget(dev)) == NULL ) 14.709 - return -EINVAL; 14.710 - 14.711 - /* 14.712 - * Update of partition info, and check of usage count, is protected 14.713 - * by the per-block-device semaphore. 14.714 - */ 14.715 - down(&bd->bd_sem); 14.716 - 14.717 - if ( ((gd = get_gendisk(dev)) == NULL) || 14.718 - ((disk = xldev_to_xldisk(dev)) == NULL) || 14.719 - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) 14.720 - { 14.721 - rc = -EINVAL; 14.722 - goto out; 14.723 - } 14.724 - 14.725 - if ( disk->usage > 1 ) 14.726 - { 14.727 - rc = -EBUSY; 14.728 - goto out; 14.729 - } 14.730 - 14.731 - /* Only reread partition table if VBDs aren't mapped to partitions. */ 14.732 - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) 14.733 - { 14.734 - for ( i = gd->max_p - 1; i >= 0; i-- ) 14.735 - { 14.736 - invalidate_device(dev+i, 1); 14.737 - gd->part[MINOR(dev+i)].start_sect = 0; 14.738 - gd->part[MINOR(dev+i)].nr_sects = 0; 14.739 - gd->sizes[MINOR(dev+i)] = 0; 14.740 - } 14.741 + struct request *req; 14.742 + blkif_response_t *bret; 14.743 + RING_IDX i, rp; 14.744 + unsigned long flags; 14.745 + struct blkfront_info *info = (struct blkfront_info *)dev_id; 14.746 14.747 - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); 14.748 - } 14.749 - 14.750 - out: 14.751 - up(&bd->bd_sem); 14.752 - bdput(bd); 14.753 - return rc; 14.754 -} 14.755 - 14.756 - 14.757 -/* 14.758 - * blkif_queue_request 14.759 - * 14.760 - * request block io 14.761 - * 14.762 - * id: for guest use only. 14.763 - * operation: BLKIF_OP_{READ,WRITE,PROBE} 14.764 - * buffer: buffer to read/write into. this should be a 14.765 - * virtual address in the guest os. 14.766 - */ 14.767 -static int blkif_queue_request(unsigned long id, 14.768 - int operation, 14.769 - char * buffer, 14.770 - unsigned long sector_number, 14.771 - unsigned short nr_sectors, 14.772 - kdev_t device, 14.773 - blkif_vdev_t handle) 14.774 -{ 14.775 - unsigned long buffer_ma = virt_to_bus(buffer); 14.776 - unsigned long xid; 14.777 - struct gendisk *gd; 14.778 - blkif_request_t *req; 14.779 - struct buffer_head *bh; 14.780 - unsigned int fsect, lsect; 14.781 - int ref; 14.782 - 14.783 - fsect = (buffer_ma & ~PAGE_MASK) >> 9; 14.784 - lsect = fsect + nr_sectors - 1; 14.785 - 14.786 - /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */ 14.787 - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) 14.788 - BUG(); 14.789 - if ( lsect > ((PAGE_SIZE/512)-1) ) 14.790 - BUG(); 14.791 - 14.792 - buffer_ma &= PAGE_MASK; 14.793 - 14.794 - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 14.795 - return 1; 14.796 - 14.797 - switch ( operation ) 14.798 - { 14.799 - 14.800 - case BLKIF_OP_READ: 14.801 - case BLKIF_OP_WRITE: 14.802 - gd = get_gendisk(device); 14.803 - 14.804 - /* 14.805 - * Update the sector_number we'll pass down as appropriate; note that 14.806 - * we could sanity check that resulting sector will be in this 14.807 - * partition, but this will happen in driver backend anyhow. 14.808 - */ 14.809 - sector_number += gd->part[MINOR(device)].start_sect; 14.810 - 14.811 - /* 14.812 - * If this unit doesn't consist of virtual partitions then we clear 14.813 - * the partn bits from the device number. 14.814 - */ 14.815 - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 14.816 - GENHD_FL_VIRT_PARTNS) ) 14.817 - device &= ~(gd->max_p - 1); 14.818 - 14.819 - if ( (sg_operation == operation) && 14.820 - (sg_dev == device) && 14.821 - (sg_next_sect == sector_number) ) 14.822 - { 14.823 - req = RING_GET_REQUEST(&info->ring, 14.824 - info->ring.req_prod_pvt - 1); 14.825 - bh = (struct buffer_head *)id; 14.826 - 14.827 - bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; 14.828 - blk_shadow[req->id].request = (unsigned long)id; 14.829 - 14.830 - /* install a grant reference. */ 14.831 - ref = gnttab_claim_grant_reference(&gref_head); 14.832 - ASSERT( ref != -ENOSPC ); 14.833 - 14.834 - gnttab_grant_foreign_access_ref( 14.835 - ref, 14.836 - info->backend_id, 14.837 - buffer_ma >> PAGE_SHIFT, 14.838 - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); 14.839 - 14.840 - blk_shadow[req->id].frame[req->nr_segments] = 14.841 - buffer_ma >> PAGE_SHIFT; 14.842 - 14.843 - req->frame_and_sects[req->nr_segments] = 14.844 - blkif_fas_from_gref(ref, fsect, lsect); 14.845 - if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) 14.846 - sg_next_sect += nr_sectors; 14.847 - else 14.848 - DISABLE_SCATTERGATHER(); 14.849 - 14.850 - /* Update the copy of the request in the recovery ring. */ 14.851 - pickle_request(&blk_shadow[req->id], req ); 14.852 - 14.853 - return 0; 14.854 - } 14.855 - else if ( RING_FULL(&info->ring) ) 14.856 - { 14.857 - return 1; 14.858 - } 14.859 - else 14.860 - { 14.861 - sg_operation = operation; 14.862 - sg_dev = device; 14.863 - sg_next_sect = sector_number + nr_sectors; 14.864 - } 14.865 - break; 14.866 - 14.867 - default: 14.868 - panic("unknown op %d\n", operation); 14.869 - } 14.870 - 14.871 - /* Fill out a communications ring structure. */ 14.872 - req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 14.873 - 14.874 - xid = GET_ID_FROM_FREELIST(); 14.875 - blk_shadow[xid].request = (unsigned long)id; 14.876 - 14.877 - req->id = xid; 14.878 - req->operation = operation; 14.879 - req->sector_number = (blkif_sector_t)sector_number; 14.880 - req->handle = handle; 14.881 - req->nr_segments = 1; 14.882 - /* install a grant reference. */ 14.883 - ref = gnttab_claim_grant_reference(&gref_head); 14.884 - ASSERT( ref != -ENOSPC ); 14.885 - 14.886 - gnttab_grant_foreign_access_ref( 14.887 - ref, 14.888 - info->backend_id, 14.889 - buffer_ma >> PAGE_SHIFT, 14.890 - ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); 14.891 - 14.892 - blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; 14.893 - 14.894 - req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect); 14.895 - 14.896 - /* Keep a private copy so we can reissue requests when recovering. */ 14.897 - pickle_request(&blk_shadow[xid], req); 14.898 + spin_lock_irqsave(&blkif_io_lock, flags); 14.899 14.900 - info->ring.req_prod_pvt++; 14.901 - 14.902 - return 0; 14.903 -} 14.904 - 14.905 - 14.906 -/* 14.907 - * do_blkif_request 14.908 - * read a block; request is in a request queue 14.909 - */ 14.910 -void do_blkif_request(request_queue_t *rq) 14.911 -{ 14.912 - struct request *req; 14.913 - struct buffer_head *bh, *next_bh; 14.914 - int rw, nsect, full, queued = 0; 14.915 - 14.916 - DPRINTK("Entered do_blkif_request\n"); 14.917 - 14.918 - while ( !rq->plugged && !list_empty(&rq->queue_head)) 14.919 - { 14.920 - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 14.921 - goto out; 14.922 - 14.923 - DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", 14.924 - req, req->cmd, req->sector, 14.925 - req->current_nr_sectors, req->nr_sectors, req->bh); 14.926 - 14.927 - rw = req->cmd; 14.928 - if ( rw == READA ) 14.929 - rw = READ; 14.930 - if ( unlikely((rw != READ) && (rw != WRITE)) ) 14.931 - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); 14.932 - 14.933 - req->errors = 0; 14.934 - 14.935 - bh = req->bh; 14.936 - while ( bh != NULL ) 14.937 - { 14.938 - next_bh = bh->b_reqnext; 14.939 - bh->b_reqnext = NULL; 14.940 - 14.941 - full = blkif_queue_request( 14.942 - (unsigned long)bh, 14.943 - (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 14.944 - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); 14.945 - 14.946 - if ( full ) 14.947 - { 14.948 - bh->b_reqnext = next_bh; 14.949 - pending_queues[nr_pending++] = rq; 14.950 - if ( unlikely(nr_pending >= MAX_PENDING) ) 14.951 - BUG(); 14.952 - goto out; 14.953 - } 14.954 - 14.955 - queued++; 14.956 - 14.957 - /* Dequeue the buffer head from the request. */ 14.958 - nsect = bh->b_size >> 9; 14.959 - bh = req->bh = next_bh; 14.960 + if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) { 14.961 + spin_unlock_irqrestore(&blkif_io_lock, flags); 14.962 + return IRQ_HANDLED; 14.963 + } 14.964 14.965 - if ( bh != NULL ) 14.966 - { 14.967 - /* There's another buffer head to do. Update the request. */ 14.968 - req->hard_sector += nsect; 14.969 - req->hard_nr_sectors -= nsect; 14.970 - req->sector = req->hard_sector; 14.971 - req->nr_sectors = req->hard_nr_sectors; 14.972 - req->current_nr_sectors = bh->b_size >> 9; 14.973 - req->buffer = bh->b_data; 14.974 - } 14.975 - else 14.976 - { 14.977 - /* That was the last buffer head. Finalise the request. */ 14.978 - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) 14.979 - BUG(); 14.980 - blkdev_dequeue_request(req); 14.981 - end_that_request_last(req); 14.982 - } 14.983 - } 14.984 - } 14.985 - 14.986 - out: 14.987 - if ( queued != 0 ) 14.988 - flush_requests(); 14.989 -} 14.990 - 14.991 - 14.992 -static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 14.993 -{ 14.994 - RING_IDX i, rp; 14.995 - unsigned long flags; 14.996 - struct buffer_head *bh, *next_bh; 14.997 - 14.998 - spin_lock_irqsave(&io_request_lock, flags); 14.999 - 14.1000 - if ( unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery) ) 14.1001 - { 14.1002 - spin_unlock_irqrestore(&io_request_lock, flags); 14.1003 - return; 14.1004 - } 14.1005 - 14.1006 - rp = info->ring.sring->rsp_prod; 14.1007 - rmb(); /* Ensure we see queued responses up to 'rp'. */ 14.1008 + rp = info->ring.sring->rsp_prod; 14.1009 + rmb(); /* Ensure we see queued responses up to 'rp'. */ 14.1010 14.1011 - for ( i = info->ring.rsp_cons; i != rp; i++ ) 14.1012 - { 14.1013 - unsigned long id; 14.1014 - blkif_response_t *bret; 14.1015 - 14.1016 - bret = RING_GET_RESPONSE(&info->ring, i); 14.1017 - id = bret->id; 14.1018 - bh = (struct buffer_head *)blk_shadow[id].request; 14.1019 - 14.1020 - blkif_completion(&blk_shadow[id]); 14.1021 - 14.1022 - ADD_ID_TO_FREELIST(id); 14.1023 + for (i = info->ring.rsp_cons; i != rp; i++) { 14.1024 + unsigned long id; 14.1025 14.1026 - switch ( bret->operation ) 14.1027 - { 14.1028 - case BLKIF_OP_READ: 14.1029 - case BLKIF_OP_WRITE: 14.1030 - if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 14.1031 - DPRINTK("Bad return from blkdev data request: %lx\n", 14.1032 - bret->status); 14.1033 - for ( ; bh != NULL; bh = next_bh ) 14.1034 - { 14.1035 - next_bh = bh->b_reqnext; 14.1036 - bh->b_reqnext = NULL; 14.1037 - bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); 14.1038 - } 14.1039 + bret = RING_GET_RESPONSE(&info->ring, i); 14.1040 + id = bret->id; 14.1041 + req = (struct request *)info->shadow[id].request; 14.1042 14.1043 - break; 14.1044 - case BLKIF_OP_PROBE: 14.1045 - memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 14.1046 - blkif_control_rsp_valid = 1; 14.1047 - break; 14.1048 - default: 14.1049 - BUG(); 14.1050 - } 14.1051 + blkif_completion(&info->shadow[id]); 14.1052 14.1053 - } 14.1054 - info->ring.rsp_cons = i; 14.1055 - 14.1056 - kick_pending_request_queues(); 14.1057 + ADD_ID_TO_FREELIST(info, id); 14.1058 14.1059 - spin_unlock_irqrestore(&io_request_lock, flags); 14.1060 -} 14.1061 + switch (bret->operation) { 14.1062 + case BLKIF_OP_READ: 14.1063 + case BLKIF_OP_WRITE: 14.1064 + if (unlikely(bret->status != BLKIF_RSP_OKAY)) 14.1065 + DPRINTK("Bad return from blkdev data " 14.1066 + "request: %x\n", bret->status); 14.1067 14.1068 -#endif 14.1069 + BUG_ON(end_that_request_first( 14.1070 + req, (bret->status == BLKIF_RSP_OKAY), 14.1071 + req->hard_nr_sectors)); 14.1072 + end_that_request_last(req); 14.1073 + break; 14.1074 + default: 14.1075 + BUG(); 14.1076 + } 14.1077 + } 14.1078 14.1079 -/***************************** COMMON CODE *******************************/ 14.1080 + info->ring.rsp_cons = i; 14.1081 + 14.1082 + kick_pending_request_queues(info); 14.1083 + 14.1084 + spin_unlock_irqrestore(&blkif_io_lock, flags); 14.1085 + 14.1086 + return IRQ_HANDLED; 14.1087 +} 14.1088 14.1089 static void blkif_free(struct blkfront_info *info) 14.1090 { 14.1091 - /* Prevent new requests being issued until we fix things up. */ 14.1092 - spin_lock_irq(&blkif_io_lock); 14.1093 - info->connected = BLKIF_STATE_DISCONNECTED; 14.1094 - spin_unlock_irq(&blkif_io_lock); 14.1095 + /* Prevent new requests being issued until we fix things up. */ 14.1096 + spin_lock_irq(&blkif_io_lock); 14.1097 + info->connected = BLKIF_STATE_DISCONNECTED; 14.1098 + spin_unlock_irq(&blkif_io_lock); 14.1099 14.1100 - /* Free resources associated with old device channel. */ 14.1101 - if ( info->ring.sring != NULL ) 14.1102 - { 14.1103 - free_page((unsigned long)info->ring.sring); 14.1104 - info->ring.sring = NULL; 14.1105 - } 14.1106 - unbind_evtchn_from_irqhandler(info->evtchn, NULL); 14.1107 - info->evtchn = 0; 14.1108 + /* Free resources associated with old device channel. */ 14.1109 + if (info->ring.sring != NULL) { 14.1110 + free_page((unsigned long)info->ring.sring); 14.1111 + info->ring.sring = NULL; 14.1112 + } 14.1113 + unbind_evtchn_from_irqhandler(info->evtchn, NULL); 14.1114 + info->evtchn = 0; 14.1115 } 14.1116 14.1117 static void blkif_recover(struct blkfront_info *info) 14.1118 { 14.1119 - int i; 14.1120 - blkif_request_t *req; 14.1121 - struct blk_shadow *copy; 14.1122 - int j; 14.1123 - 14.1124 - /* Stage 1: Make a safe copy of the shadow state. */ 14.1125 - copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); 14.1126 - BUG_ON(copy == NULL); 14.1127 - memcpy(copy, blk_shadow, sizeof(blk_shadow)); 14.1128 - 14.1129 - /* Stage 2: Set up free list. */ 14.1130 - memset(&blk_shadow, 0, sizeof(blk_shadow)); 14.1131 - for ( i = 0; i < BLK_RING_SIZE; i++ ) 14.1132 - blk_shadow[i].req.id = i+1; 14.1133 - blk_shadow_free = info->ring.req_prod_pvt; 14.1134 - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 14.1135 - 14.1136 - /* Stage 3: Find pending requests and requeue them. */ 14.1137 - for ( i = 0; i < BLK_RING_SIZE; i++ ) 14.1138 - { 14.1139 - /* Not in use? */ 14.1140 - if ( copy[i].request == 0 ) 14.1141 - continue; 14.1142 - 14.1143 - /* Grab a request slot and unpickle shadow state into it. */ 14.1144 - req = RING_GET_REQUEST( 14.1145 - &info->ring, info->ring.req_prod_pvt); 14.1146 - unpickle_request(req, ©[i]); 14.1147 + int i; 14.1148 + blkif_request_t *req; 14.1149 + struct blk_shadow *copy; 14.1150 + int j; 14.1151 14.1152 - /* We get a new request id, and must reset the shadow state. */ 14.1153 - req->id = GET_ID_FROM_FREELIST(); 14.1154 - memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i])); 14.1155 + /* Stage 1: Make a safe copy of the shadow state. */ 14.1156 + copy = (struct blk_shadow *)kmalloc(sizeof(info->shadow), GFP_KERNEL); 14.1157 + BUG_ON(copy == NULL); 14.1158 + memcpy(copy, info->shadow, sizeof(info->shadow)); 14.1159 14.1160 - /* Rewrite any grant references invalidated by suspend/resume. */ 14.1161 - for ( j = 0; j < req->nr_segments; j++ ) 14.1162 - { 14.1163 - if ( req->frame_and_sects[j] & GRANTREF_INVALID ) 14.1164 - gnttab_grant_foreign_access_ref( 14.1165 - blkif_gref_from_fas(req->frame_and_sects[j]), 14.1166 - info->backend_id, 14.1167 - blk_shadow[req->id].frame[j], 14.1168 - rq_data_dir((struct request *) 14.1169 - blk_shadow[req->id].request)); 14.1170 - req->frame_and_sects[j] &= ~GRANTREF_INVALID; 14.1171 - } 14.1172 - blk_shadow[req->id].req = *req; 14.1173 + /* Stage 2: Set up free list. */ 14.1174 + memset(&info->shadow, 0, sizeof(info->shadow)); 14.1175 + for (i = 0; i < BLK_RING_SIZE; i++) 14.1176 + info->shadow[i].req.id = i+1; 14.1177 + info->shadow_free = info->ring.req_prod_pvt; 14.1178 + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 14.1179 14.1180 - info->ring.req_prod_pvt++; 14.1181 - } 14.1182 - 14.1183 - kfree(copy); 14.1184 - 14.1185 - recovery = 0; 14.1186 + /* Stage 3: Find pending requests and requeue them. */ 14.1187 + for (i = 0; i < BLK_RING_SIZE; i++) { 14.1188 + /* Not in use? */ 14.1189 + if (copy[i].request == 0) 14.1190 + continue; 14.1191 14.1192 - /* info->ring->req_prod will be set when we flush_requests().*/ 14.1193 - wmb(); 14.1194 + /* Grab a request slot and unpickle shadow state into it. */ 14.1195 + req = RING_GET_REQUEST( 14.1196 + &info->ring, info->ring.req_prod_pvt); 14.1197 + unpickle_request(req, ©[i]); 14.1198 14.1199 - /* Kicks things back into life. */ 14.1200 - flush_requests(info); 14.1201 + /* We get a new request id, and must reset the shadow state. */ 14.1202 + req->id = GET_ID_FROM_FREELIST(info); 14.1203 + memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 14.1204 14.1205 - /* Now safe to left other people use the interface. */ 14.1206 - info->connected = BLKIF_STATE_CONNECTED; 14.1207 + /* Rewrite any grant references invalidated by susp/resume. */ 14.1208 + for (j = 0; j < req->nr_segments; j++) { 14.1209 + if ( req->frame_and_sects[j] & GRANTREF_INVALID ) 14.1210 + gnttab_grant_foreign_access_ref( 14.1211 + blkif_gref_from_fas( 14.1212 + req->frame_and_sects[j]), 14.1213 + info->backend_id, 14.1214 + info->shadow[req->id].frame[j], 14.1215 + rq_data_dir( 14.1216 + (struct request *) 14.1217 + info->shadow[req->id].request)); 14.1218 + req->frame_and_sects[j] &= ~GRANTREF_INVALID; 14.1219 + } 14.1220 + info->shadow[req->id].req = *req; 14.1221 + 14.1222 + info->ring.req_prod_pvt++; 14.1223 + } 14.1224 + 14.1225 + kfree(copy); 14.1226 + 14.1227 + recovery = 0; 14.1228 + 14.1229 + /* info->ring->req_prod will be set when we flush_requests().*/ 14.1230 + wmb(); 14.1231 + 14.1232 + /* Kicks things back into life. */ 14.1233 + flush_requests(info); 14.1234 + 14.1235 + /* Now safe to left other people use the interface. */ 14.1236 + info->connected = BLKIF_STATE_CONNECTED; 14.1237 } 14.1238 14.1239 static void blkif_connect(struct blkfront_info *info, u16 evtchn) 14.1240 { 14.1241 - int err = 0; 14.1242 - 14.1243 - info->evtchn = evtchn; 14.1244 + int err = 0; 14.1245 14.1246 - err = bind_evtchn_to_irqhandler( 14.1247 - info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); 14.1248 - if ( err != 0 ) 14.1249 - { 14.1250 - WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); 14.1251 - return; 14.1252 - } 14.1253 + info->evtchn = evtchn; 14.1254 + 14.1255 + err = bind_evtchn_to_irqhandler( 14.1256 + info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info); 14.1257 + if (err != 0) { 14.1258 + WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err); 14.1259 + return; 14.1260 + } 14.1261 } 14.1262 14.1263 14.1264 @@ -1227,9 +621,8 @@ static int talk_to_backend(struct xenbus 14.1265 static int blkfront_probe(struct xenbus_device *dev, 14.1266 const struct xenbus_device_id *id) 14.1267 { 14.1268 - int err; 14.1269 + int err, vdevice, i; 14.1270 struct blkfront_info *info; 14.1271 - int vdevice; 14.1272 14.1273 /* FIXME: Use dynamic device id if this is not set. */ 14.1274 err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice); 14.1275 @@ -1251,6 +644,12 @@ static int blkfront_probe(struct xenbus_ 14.1276 info->mi = NULL; 14.1277 INIT_WORK(&info->work, blkif_restart_queue, (void *)info); 14.1278 14.1279 + info->shadow_free = 0; 14.1280 + memset(info->shadow, 0, sizeof(info->shadow)); 14.1281 + for (i = 0; i < BLK_RING_SIZE; i++) 14.1282 + info->shadow[i].req.id = i+1; 14.1283 + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 14.1284 + 14.1285 /* Front end dir is a number, which is used as the id. */ 14.1286 info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); 14.1287 dev->data = info; 14.1288 @@ -1329,55 +728,57 @@ static void __init init_blk_xenbus(void) 14.1289 14.1290 static int wait_for_blkif(void) 14.1291 { 14.1292 - int err = 0; 14.1293 - int i; 14.1294 + int err = 0; 14.1295 + int i; 14.1296 14.1297 - /* 14.1298 - * We should figure out how many and which devices we need to 14.1299 - * proceed and only wait for those. For now, continue once the 14.1300 - * first device is around. 14.1301 - */ 14.1302 - for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ ) 14.1303 - { 14.1304 - set_current_state(TASK_INTERRUPTIBLE); 14.1305 - schedule_timeout(1); 14.1306 - } 14.1307 + /* 14.1308 + * We should figure out how many and which devices we need to 14.1309 + * proceed and only wait for those. For now, continue once the 14.1310 + * first device is around. 14.1311 + */ 14.1312 + for (i = 0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++) { 14.1313 + set_current_state(TASK_INTERRUPTIBLE); 14.1314 + schedule_timeout(1); 14.1315 + } 14.1316 14.1317 - if ( blkif_state != BLKIF_STATE_CONNECTED ) 14.1318 - { 14.1319 - WPRINTK("Timeout connecting to device!\n"); 14.1320 - err = -ENOSYS; 14.1321 - } 14.1322 - return err; 14.1323 + if (blkif_state != BLKIF_STATE_CONNECTED) { 14.1324 + WPRINTK("Timeout connecting to device!\n"); 14.1325 + err = -ENOSYS; 14.1326 + } 14.1327 + return err; 14.1328 } 14.1329 14.1330 static int __init xlblk_init(void) 14.1331 { 14.1332 - int i; 14.1333 - 14.1334 - if ( (xen_start_info.flags & SIF_INITDOMAIN) || 14.1335 - (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) 14.1336 - return 0; 14.1337 - 14.1338 - IPRINTK("Initialising virtual block device driver\n"); 14.1339 + if ((xen_start_info.flags & SIF_INITDOMAIN) 14.1340 + || (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) 14.1341 + return 0; 14.1342 14.1343 - blk_shadow_free = 0; 14.1344 - memset(blk_shadow, 0, sizeof(blk_shadow)); 14.1345 - for ( i = 0; i < BLK_RING_SIZE; i++ ) 14.1346 - blk_shadow[i].req.id = i+1; 14.1347 - blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 14.1348 + IPRINTK("Initialising virtual block device driver\n"); 14.1349 14.1350 - init_blk_xenbus(); 14.1351 + init_blk_xenbus(); 14.1352 14.1353 - wait_for_blkif(); 14.1354 + wait_for_blkif(); 14.1355 14.1356 - return 0; 14.1357 + return 0; 14.1358 } 14.1359 14.1360 +module_init(xlblk_init); 14.1361 + 14.1362 static void blkif_completion(struct blk_shadow *s) 14.1363 { 14.1364 - int i; 14.1365 - for ( i = 0; i < s->req.nr_segments; i++ ) 14.1366 - gnttab_free_grant_reference( 14.1367 - blkif_gref_from_fas(s->req.frame_and_sects[i])); 14.1368 + int i; 14.1369 + for (i = 0; i < s->req.nr_segments; i++) 14.1370 + gnttab_free_grant_reference( 14.1371 + blkif_gref_from_fas(s->req.frame_and_sects[i])); 14.1372 } 14.1373 + 14.1374 +/* 14.1375 + * Local variables: 14.1376 + * c-file-style: "linux" 14.1377 + * indent-tabs-mode: t 14.1378 + * c-indent-level: 8 14.1379 + * c-basic-offset: 8 14.1380 + * tab-width: 8 14.1381 + * End: 14.1382 + */
15.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Aug 26 13:47:16 2005 -0700 15.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Mon Aug 29 13:19:24 2005 -0700 15.3 @@ -96,6 +96,14 @@ struct xlbd_major_info 15.4 struct xlbd_type_info *type; 15.5 }; 15.6 15.7 +struct blk_shadow { 15.8 + blkif_request_t req; 15.9 + unsigned long request; 15.10 + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 15.11 +}; 15.12 + 15.13 +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 15.14 + 15.15 /* 15.16 * We have one of these per vbd, whether ide, scsi or 'other'. They 15.17 * hang in private_data off the gendisk structure. We may end up 15.18 @@ -116,11 +124,11 @@ struct blkfront_info 15.19 blkif_front_ring_t ring; 15.20 unsigned int evtchn; 15.21 struct xlbd_major_info *mi; 15.22 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 15.23 request_queue_t *rq; 15.24 -#endif 15.25 struct work_struct work; 15.26 struct gnttab_free_callback callback; 15.27 + struct blk_shadow shadow[BLK_RING_SIZE]; 15.28 + unsigned long shadow_free; 15.29 }; 15.30 15.31 extern spinlock_t blkif_io_lock;
16.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Aug 26 13:47:16 2005 -0700 16.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Aug 29 13:19:24 2005 -0700 16.3 @@ -1272,25 +1272,24 @@ static int netfront_remove(struct xenbus 16.4 16.5 static int netfront_suspend(struct xenbus_device *dev) 16.6 { 16.7 - struct net_private *np = dev->data; 16.8 - /* Avoid having tx/rx stuff happen until we're ready. */ 16.9 - unbind_evtchn_from_irqhandler(np->evtchn, np->netdev); 16.10 - return 0; 16.11 + struct netfront_info *info = dev->data; 16.12 + 16.13 + unregister_xenbus_watch(&info->watch); 16.14 + kfree(info->backend); 16.15 + info->backend = NULL; 16.16 + 16.17 + netif_free(info); 16.18 + 16.19 + return 0; 16.20 } 16.21 16.22 static int netfront_resume(struct xenbus_device *dev) 16.23 { 16.24 - struct net_private *np = dev->data; 16.25 - /* 16.26 - * Connect regardless of whether IFF_UP flag set. 16.27 - * Stop bad things from happening until we're back up. 16.28 - */ 16.29 - np->backend_state = BEST_DISCONNECTED; 16.30 - memset(np->tx, 0, PAGE_SIZE); 16.31 - memset(np->rx, 0, PAGE_SIZE); 16.32 - 16.33 - // send_interface_connect(np); 16.34 - return 0; 16.35 + struct net_private *np = dev->data; 16.36 + int err; 16.37 + 16.38 + err = talk_to_backend(dev, np); 16.39 + return err; 16.40 } 16.41 16.42 static struct xenbus_driver netfront = {
17.1 --- a/tools/libxc/xc_linux_save.c Fri Aug 26 13:47:16 2005 -0700 17.2 +++ b/tools/libxc/xc_linux_save.c Mon Aug 29 13:19:24 2005 -0700 17.3 @@ -763,8 +763,6 @@ int xc_linux_save(int xc_handle, int io_ 17.4 batch++; 17.5 } 17.6 17.7 -// DPRINTF("batch %d:%d (n=%d)\n", iter, batch, n); 17.8 - 17.9 if ( batch == 0 ) 17.10 goto skip; /* vanishingly unlikely... */ 17.11 17.12 @@ -915,7 +913,7 @@ int xc_linux_save(int xc_handle, int io_ 17.13 continue; 17.14 } 17.15 17.16 - if ( last_iter ) break; 17.17 + if ( last_iter ) break; 17.18 17.19 if ( live ) 17.20 {
18.1 --- a/tools/python/xen/xend/XendCheckpoint.py Fri Aug 26 13:47:16 2005 -0700 18.2 +++ b/tools/python/xen/xend/XendCheckpoint.py Mon Aug 29 13:19:24 2005 -0700 18.3 @@ -51,7 +51,7 @@ def save(xd, fd, dominfo): 18.4 p = select.poll() 18.5 p.register(child.fromchild.fileno()) 18.6 p.register(child.childerr.fileno()) 18.7 - while True: 18.8 + while True: 18.9 r = p.poll() 18.10 for (fd, event) in r: 18.11 if not event & select.POLLIN: 18.12 @@ -69,8 +69,9 @@ def save(xd, fd, dominfo): 18.13 try: 18.14 dominfo.db.releaseDomain(dominfo.id) 18.15 except Exception, ex: 18.16 - log.warning("error in domain release on xenstore: %s", 18.17 - ex) 18.18 + log.warning( 18.19 + "error in domain release on xenstore: %s", 18.20 + ex) 18.21 pass 18.22 dominfo.state_wait("suspended") 18.23 log.info("suspend %d done" % dominfo.id)
19.1 --- a/tools/python/xen/xend/server/SrvDaemon.py Fri Aug 26 13:47:16 2005 -0700 19.2 +++ b/tools/python/xen/xend/server/SrvDaemon.py Mon Aug 29 13:19:24 2005 -0700 19.3 @@ -42,7 +42,8 @@ class Daemon: 19.4 self.traceon = 0 19.5 self.tracefile = None 19.6 self.traceindent = 0 19.7 - 19.8 + self.child = 0 19.9 + 19.10 def daemon_pids(self): 19.11 pids = [] 19.12 pidex = '(?P<pid>\d+)' 19.13 @@ -140,15 +141,12 @@ class Daemon: 19.14 else: 19.15 return 0 19.16 19.17 - def install_child_reaper(self): 19.18 - #signal.signal(signal.SIGCHLD, self.onSIGCHLD) 19.19 - # Ensure that zombie children are automatically reaped. 19.20 - xu.autoreap() 19.21 - 19.22 def onSIGCHLD(self, signum, frame): 19.23 - code = 1 19.24 - while code > 0: 19.25 - code = os.waitpid(-1, os.WNOHANG) 19.26 + if self.child > 0: 19.27 + try: 19.28 + pid, sts = os.waitpid(self.child, os.WNOHANG) 19.29 + except os.error, ex: 19.30 + pass 19.31 19.32 def fork_pid(self, pidfile): 19.33 """Fork and write the pid of the child to 'pidfile'. 19.34 @@ -156,13 +154,16 @@ class Daemon: 19.35 @param pidfile: pid file 19.36 @return: pid of child in parent, 0 in child 19.37 """ 19.38 - pid = os.fork() 19.39 - if pid: 19.40 + 19.41 + self.child = os.fork() 19.42 + 19.43 + if self.child: 19.44 # Parent 19.45 pidfile = open(pidfile, 'w') 19.46 - pidfile.write(str(pid)) 19.47 + pidfile.write(str(self.child)) 19.48 pidfile.close() 19.49 - return pid 19.50 + 19.51 + return self.child 19.52 19.53 def daemonize(self): 19.54 if not XEND_DAEMONIZE: return 19.55 @@ -203,8 +204,7 @@ class Daemon: 19.56 # Trying to run an already-running service is a success. 19.57 return 0 19.58 19.59 - self.install_child_reaper() 19.60 - 19.61 + signal.signal(signal.SIGCHLD, self.onSIGCHLD) 19.62 if self.fork_pid(XEND_PID_FILE): 19.63 #Parent. Sleep to give child time to start. 19.64 time.sleep(1) 19.65 @@ -309,7 +309,7 @@ class Daemon: 19.66 print >>sys.stderr, 'Exception starting xend:', ex 19.67 if XEND_DEBUG: 19.68 traceback.print_exc() 19.69 - log.exception("Exception starting xend") 19.70 + log.exception("Exception starting xend (%s)" % ex) 19.71 self.exit(1) 19.72 19.73 def createFactories(self):
20.1 --- a/tools/python/xen/xm/main.py Fri Aug 26 13:47:16 2005 -0700 20.2 +++ b/tools/python/xen/xm/main.py Mon Aug 29 13:19:24 2005 -0700 20.3 @@ -715,9 +715,9 @@ def main(argv=sys.argv): 20.4 err("Most commands need root access. Please try again as root") 20.5 sys.exit(1) 20.6 except XendError, ex: 20.7 - if args[0] == "bogus": 20.8 - args.remove("bogus") 20.9 if len(args) > 0: 20.10 + if args[0] == "bogus": 20.11 + args.remove("bogus") 20.12 handle_xend_error(argv[1], args[0], ex) 20.13 else: 20.14 print "Unexpected error:", sys.exc_info()[0]