ia64/xen-unstable

changeset 10513:fdf25330e4a6

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Fri Jun 23 15:33:25 2006 -0600 (2006-06-23)
parents 59d4c1863330 f91cc71173c5
children d4e799a1fa9c
files tools/blktap/Makefile tools/blktap/README tools/blktap/README.sept05 tools/blktap/blkdump.c tools/blktap/blkif.c tools/blktap/blktaplib.c tools/blktap/blktaplib.h tools/blktap/list.h tools/blktap/parallax/Makefile tools/blktap/parallax/README tools/blktap/parallax/block-async.c tools/blktap/parallax/block-async.h tools/blktap/parallax/blockstore.c tools/blktap/parallax/blockstore.h tools/blktap/parallax/blockstored.c tools/blktap/parallax/bstest.c tools/blktap/parallax/parallax.c tools/blktap/parallax/radix.c tools/blktap/parallax/radix.h tools/blktap/parallax/requests-async.c tools/blktap/parallax/requests-async.h tools/blktap/parallax/snaplog.c tools/blktap/parallax/snaplog.h tools/blktap/parallax/vdi.c tools/blktap/parallax/vdi.h tools/blktap/parallax/vdi_create.c tools/blktap/parallax/vdi_fill.c tools/blktap/parallax/vdi_list.c tools/blktap/parallax/vdi_snap.c tools/blktap/parallax/vdi_snap_delete.c tools/blktap/parallax/vdi_snap_list.c tools/blktap/parallax/vdi_tree.c tools/blktap/parallax/vdi_unittest.c tools/blktap/parallax/vdi_validate.c tools/blktap/ublkback/Makefile tools/blktap/ublkback/ublkback.c tools/blktap/ublkback/ublkbacklib.c tools/blktap/ublkback/ublkbacklib.h tools/blktap/xenbus.c xen/arch/ia64/xen/domain.c xen/arch/ia64/xen/xensetup.c xen/include/asm-ia64/vmx.h
line diff
     1.1 --- a/docs/src/user.tex	Fri Jun 23 15:26:01 2006 -0600
     1.2 +++ b/docs/src/user.tex	Fri Jun 23 15:33:25 2006 -0600
     1.3 @@ -1972,7 +1972,8 @@ editing \path{grub.conf}.
     1.4  \item [ console=$<$specifier list$>$ ] Specify the destination for Xen
     1.5    console I/O.  This is a comma-separated list of, for example:
     1.6    \begin{description}
     1.7 -  \item[ vga ] Use VGA console and allow keyboard input.
     1.8 +  \item[ vga ] Use VGA console (only until domain 0 boots, unless {\bf
     1.9 +  vga[keep] } is specified).
    1.10    \item[ com1 ] Use serial port com1.
    1.11    \item[ com2H ] Use serial port com2. Transmitted chars will have the
    1.12      MSB set. Received chars must have MSB set.
     2.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c	Fri Jun 23 15:26:01 2006 -0600
     2.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c	Fri Jun 23 15:33:25 2006 -0600
     2.3 @@ -273,6 +273,49 @@ static void dump_fault_path(unsigned lon
     2.4  }
     2.5  #endif
     2.6  
     2.7 +static int spurious_fault(struct pt_regs *regs,
     2.8 +			  unsigned long address,
     2.9 +			  unsigned long error_code)
    2.10 +{
    2.11 +	pgd_t *pgd;
    2.12 +	pud_t *pud;
    2.13 +	pmd_t *pmd;
    2.14 +	pte_t *pte;
    2.15 +
    2.16 +#ifdef CONFIG_XEN
    2.17 +	/* Faults in hypervisor area are never spurious. */
    2.18 +	if (address >= HYPERVISOR_VIRT_START)
    2.19 +		return 0;
    2.20 +#endif
    2.21 +
    2.22 +	/* Reserved-bit violation or user access to kernel space? */
    2.23 +	if (error_code & 0x0c)
    2.24 +		return 0;
    2.25 +
    2.26 +	pgd = init_mm.pgd + pgd_index(address);
    2.27 +	if (!pgd_present(*pgd))
    2.28 +		return 0;
    2.29 +
    2.30 +	pud = pud_offset(pgd, address);
    2.31 +	if (!pud_present(*pud))
    2.32 +		return 0;
    2.33 +
    2.34 +	pmd = pmd_offset(pud, address);
    2.35 +	if (!pmd_present(*pmd))
    2.36 +		return 0;
    2.37 +
    2.38 +	pte = pte_offset_kernel(pmd, address);
    2.39 +	if (!pte_present(*pte))
    2.40 +		return 0;
    2.41 +	if ((error_code & 0x02) && !pte_write(*pte))
    2.42 +		return 0;
    2.43 +#ifdef CONFIG_X86_PAE
    2.44 +	if ((error_code & 0x10) && (pte_val(*pte) & _PAGE_NX))
    2.45 +		return 0;
    2.46 +#endif
    2.47 +
    2.48 +	return 1;
    2.49 +}
    2.50  
    2.51  /*
    2.52   * This routine handles page faults.  It determines the address,
    2.53 @@ -327,8 +370,16 @@ fastcall void __kprobes do_page_fault(st
    2.54  	 * protection error (error_code & 1) == 0.
    2.55  	 */
    2.56  	if (unlikely(address >= TASK_SIZE)) { 
    2.57 +#ifdef CONFIG_XEN
    2.58 +		/* Faults in hypervisor area can never be patched up. */
    2.59 +		if (address >= HYPERVISOR_VIRT_START)
    2.60 +			goto bad_area_nosemaphore;
    2.61 +#endif
    2.62  		if (!(error_code & 5))
    2.63  			goto vmalloc_fault;
    2.64 +		/* Can take a spurious fault if mapping changes R/O -> R/W. */
    2.65 +		if (spurious_fault(regs, address, error_code))
    2.66 +			return;
    2.67  		/* 
    2.68  		 * Don't take the mm semaphore here. If we fixup a prefetch
    2.69  		 * fault we could otherwise deadlock.
     3.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c	Fri Jun 23 15:26:01 2006 -0600
     3.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c	Fri Jun 23 15:33:25 2006 -0600
     3.3 @@ -263,6 +263,10 @@ static void contiguous_bitmap_clear(
     3.4  	}
     3.5  }
     3.6  
     3.7 +/* Protected by balloon_lock. */
     3.8 +#define MAX_CONTIG_ORDER 7
     3.9 +static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
    3.10 +
    3.11  /* Ensure multi-page extents are contiguous in machine memory. */
    3.12  int xen_create_contiguous_region(
    3.13  	unsigned long vstart, unsigned int order, unsigned int address_bits)
    3.14 @@ -271,13 +275,23 @@ int xen_create_contiguous_region(
    3.15  	pud_t         *pud; 
    3.16  	pmd_t         *pmd;
    3.17  	pte_t         *pte;
    3.18 +	unsigned long *in_frames = discontig_frames, out_frame;
    3.19  	unsigned long  frame, i, flags;
    3.20 -	struct xen_memory_reservation reservation = {
    3.21 -		.nr_extents   = 1,
    3.22 -		.extent_order = 0,
    3.23 -		.domid        = DOMID_SELF
    3.24 +	long           rc;
    3.25 +	int            success;
    3.26 +	struct xen_memory_exchange exchange = {
    3.27 +		.in = {
    3.28 +			.nr_extents   = 1UL << order,
    3.29 +			.extent_order = 0,
    3.30 +			.domid        = DOMID_SELF
    3.31 +		},
    3.32 +		.out = {
    3.33 +			.nr_extents   = 1,
    3.34 +			.extent_order = order,
    3.35 +			.address_bits = address_bits,
    3.36 +			.domid        = DOMID_SELF
    3.37 +		}
    3.38  	};
    3.39 -	set_xen_guest_handle(reservation.extent_start, &frame);
    3.40  
    3.41  	/*
    3.42  	 * Currently an auto-translated guest will not perform I/O, nor will
    3.43 @@ -287,68 +301,73 @@ int xen_create_contiguous_region(
    3.44  	if (xen_feature(XENFEAT_auto_translated_physmap))
    3.45  		return 0;
    3.46  
    3.47 +	if (order > MAX_CONTIG_ORDER)
    3.48 +		return -ENOMEM;
    3.49 +
    3.50 +	set_xen_guest_handle(exchange.in.extent_start, in_frames);
    3.51 +	set_xen_guest_handle(exchange.out.extent_start, &out_frame);
    3.52 +
    3.53  	scrub_pages(vstart, 1 << order);
    3.54  
    3.55  	balloon_lock(flags);
    3.56  
    3.57 -	/* 1. Zap current PTEs, giving away the underlying pages. */
    3.58 -	for (i = 0; i < (1<<order); i++) {
    3.59 +	/* 1. Zap current PTEs, remembering MFNs. */
    3.60 +	for (i = 0; i < (1UL<<order); i++) {
    3.61  		pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
    3.62  		pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
    3.63  		pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
    3.64  		pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
    3.65 -		frame = pte_mfn(*pte);
    3.66 -		BUG_ON(HYPERVISOR_update_va_mapping(
    3.67 -			vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
    3.68 +		in_frames[i] = pte_mfn(*pte);
    3.69 +		if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
    3.70 +						 __pte_ma(0), 0))
    3.71 +			BUG();
    3.72  		set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
    3.73  			INVALID_P2M_ENTRY);
    3.74 -		BUG_ON(HYPERVISOR_memory_op(
    3.75 -			XENMEM_decrease_reservation, &reservation) != 1);
    3.76  	}
    3.77  
    3.78  	/* 2. Get a new contiguous memory extent. */
    3.79 -	reservation.extent_order = order;
    3.80 -	reservation.address_bits = address_bits;
    3.81 -	frame = __pa(vstart) >> PAGE_SHIFT;
    3.82 -	if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
    3.83 -				 &reservation) != 1)
    3.84 -		goto fail;
    3.85 -
    3.86 -	/* 3. Map the new extent in place of old pages. */
    3.87 -	for (i = 0; i < (1<<order); i++) {
    3.88 -		BUG_ON(HYPERVISOR_update_va_mapping(
    3.89 -			vstart + (i*PAGE_SIZE),
    3.90 -			pfn_pte_ma(frame+i, PAGE_KERNEL), 0));
    3.91 -		set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame+i);
    3.92 +	out_frame = __pa(vstart) >> PAGE_SHIFT;
    3.93 +	rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
    3.94 +	success = (exchange.nr_exchanged == (1UL << order));
    3.95 +	BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
    3.96 +	BUG_ON(success && (rc != 0));
    3.97 +	if (unlikely(rc == -ENOSYS)) {
    3.98 +		/* Compatibility when XENMEM_exchange is unsupported. */
    3.99 +		if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
   3.100 +					 &exchange.in) != (1UL << order))
   3.101 +			BUG();
   3.102 +		success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
   3.103 +						&exchange.out) == 1);
   3.104 +		if (!success) {
   3.105 +			/* Couldn't get special memory: fall back to normal. */
   3.106 +			for (i = 0; i < (1UL<<order); i++)
   3.107 +				in_frames[i] = (__pa(vstart)>>PAGE_SHIFT) + i;
   3.108 +			if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
   3.109 +						 &exchange.in) != (1UL<<order))
   3.110 +				BUG();
   3.111 +		}
   3.112  	}
   3.113  
   3.114 -	flush_tlb_all();
   3.115 -
   3.116 -	contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
   3.117 -
   3.118 -	balloon_unlock(flags);
   3.119 -
   3.120 -	return 0;
   3.121 -
   3.122 - fail:
   3.123 -	reservation.extent_order = 0;
   3.124 -	reservation.address_bits = 0;
   3.125 -
   3.126 -	for (i = 0; i < (1<<order); i++) {
   3.127 -		frame = (__pa(vstart) >> PAGE_SHIFT) + i;
   3.128 -		BUG_ON(HYPERVISOR_memory_op(
   3.129 -			XENMEM_populate_physmap, &reservation) != 1);
   3.130 -		BUG_ON(HYPERVISOR_update_va_mapping(
   3.131 -			vstart + (i*PAGE_SIZE),
   3.132 -			pfn_pte_ma(frame, PAGE_KERNEL), 0));
   3.133 +	/* 3. Map the new extent in place of old pages. */
   3.134 +	for (i = 0; i < (1UL<<order); i++) {
   3.135 +		frame = success ? (out_frame + i) : in_frames[i];
   3.136 +		if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
   3.137 +						 pfn_pte_ma(frame,
   3.138 +							    PAGE_KERNEL),
   3.139 +						 0))
   3.140 +			BUG();
   3.141  		set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
   3.142  	}
   3.143  
   3.144  	flush_tlb_all();
   3.145  
   3.146 +	if (success)
   3.147 +		contiguous_bitmap_set(__pa(vstart) >> PAGE_SHIFT,
   3.148 +				      1UL << order);
   3.149 +
   3.150  	balloon_unlock(flags);
   3.151  
   3.152 -	return -ENOMEM;
   3.153 +	return success ? 0 : -ENOMEM;
   3.154  }
   3.155  
   3.156  void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
   3.157 @@ -357,47 +376,79 @@ void xen_destroy_contiguous_region(unsig
   3.158  	pud_t         *pud; 
   3.159  	pmd_t         *pmd;
   3.160  	pte_t         *pte;
   3.161 +	unsigned long *out_frames = discontig_frames, in_frame;
   3.162  	unsigned long  frame, i, flags;
   3.163 -	struct xen_memory_reservation reservation = {
   3.164 -		.nr_extents   = 1,
   3.165 -		.extent_order = 0,
   3.166 -		.domid        = DOMID_SELF
   3.167 +	long           rc;
   3.168 +	int            success;
   3.169 +	struct xen_memory_exchange exchange = {
   3.170 +		.in = {
   3.171 +			.nr_extents   = 1,
   3.172 +			.extent_order = order,
   3.173 +			.domid        = DOMID_SELF
   3.174 +		},
   3.175 +		.out = {
   3.176 +			.nr_extents   = 1UL << order,
   3.177 +			.extent_order = 0,
   3.178 +			.domid        = DOMID_SELF
   3.179 +		}
   3.180  	};
   3.181 -	set_xen_guest_handle(reservation.extent_start, &frame);
   3.182  
   3.183  	if (xen_feature(XENFEAT_auto_translated_physmap) ||
   3.184  	    !test_bit(__pa(vstart) >> PAGE_SHIFT, contiguous_bitmap))
   3.185  		return;
   3.186  
   3.187 +	if (order > MAX_CONTIG_ORDER)
   3.188 +		return;
   3.189 +
   3.190 +	set_xen_guest_handle(exchange.in.extent_start, &in_frame);
   3.191 +	set_xen_guest_handle(exchange.out.extent_start, out_frames);
   3.192 +
   3.193  	scrub_pages(vstart, 1 << order);
   3.194  
   3.195  	balloon_lock(flags);
   3.196  
   3.197  	contiguous_bitmap_clear(__pa(vstart) >> PAGE_SHIFT, 1UL << order);
   3.198  
   3.199 -	/* 1. Zap current PTEs, giving away the underlying pages. */
   3.200 -	for (i = 0; i < (1<<order); i++) {
   3.201 -		pgd = pgd_offset_k(vstart + (i*PAGE_SIZE));
   3.202 -		pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
   3.203 -		pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
   3.204 -		pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
   3.205 -		frame = pte_mfn(*pte);
   3.206 -		BUG_ON(HYPERVISOR_update_va_mapping(
   3.207 -			vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
   3.208 +	/* 1. Find start MFN of contiguous extent. */
   3.209 +	pgd = pgd_offset_k(vstart);
   3.210 +	pud = pud_offset(pgd, vstart);
   3.211 +	pmd = pmd_offset(pud, vstart);
   3.212 +	pte = pte_offset_kernel(pmd, vstart);
   3.213 +	in_frame = pte_mfn(*pte);
   3.214 +
   3.215 +	/* 2. Zap current PTEs. */
   3.216 +	for (i = 0; i < (1UL<<order); i++) {
   3.217 +		if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
   3.218 +						 __pte_ma(0), 0));
   3.219  		set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
   3.220  			INVALID_P2M_ENTRY);
   3.221 -		BUG_ON(HYPERVISOR_memory_op(
   3.222 -			XENMEM_decrease_reservation, &reservation) != 1);
   3.223 +		out_frames[i] = (__pa(vstart) >> PAGE_SHIFT) + i;
   3.224  	}
   3.225  
   3.226 -	/* 2. Map new pages in place of old pages. */
   3.227 -	for (i = 0; i < (1<<order); i++) {
   3.228 -		frame = (__pa(vstart) >> PAGE_SHIFT) + i;
   3.229 -		BUG_ON(HYPERVISOR_memory_op(
   3.230 -			XENMEM_populate_physmap, &reservation) != 1);
   3.231 -		BUG_ON(HYPERVISOR_update_va_mapping(
   3.232 -			vstart + (i*PAGE_SIZE),
   3.233 -			pfn_pte_ma(frame, PAGE_KERNEL), 0));
   3.234 +	/* 3. Do the exchange for non-contiguous MFNs. */
   3.235 +	rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
   3.236 +	success = (exchange.nr_exchanged == 1);
   3.237 +	BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
   3.238 +	BUG_ON(success && (rc != 0));
   3.239 +	if (rc == -ENOSYS) {
   3.240 +		/* Compatibility when XENMEM_exchange is unsupported. */
   3.241 +		if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
   3.242 +					 &exchange.in) != 1)
   3.243 +			BUG();
   3.244 +		if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
   3.245 +					 &exchange.out) != (1UL << order))
   3.246 +			BUG();
   3.247 +		success = 1;
   3.248 +	}
   3.249 +
   3.250 +	/* 4. Map new pages in place of old pages. */
   3.251 +	for (i = 0; i < (1UL<<order); i++) {
   3.252 +		frame = success ? out_frames[i] : (in_frame + i);
   3.253 +		if (HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE),
   3.254 +						 pfn_pte_ma(frame,
   3.255 +							    PAGE_KERNEL),
   3.256 +						 0))
   3.257 +			BUG();
   3.258  		set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
   3.259  	}
   3.260  
     4.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c	Fri Jun 23 15:26:01 2006 -0600
     4.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head64-xen.c	Fri Jun 23 15:33:25 2006 -0600
     4.3 @@ -15,6 +15,7 @@
     4.4  #include <linux/kernel.h>
     4.5  #include <linux/string.h>
     4.6  #include <linux/percpu.h>
     4.7 +#include <linux/module.h>
     4.8  
     4.9  #include <asm/processor.h>
    4.10  #include <asm/proto.h>
    4.11 @@ -92,8 +93,16 @@ static void __init setup_boot_cpu_data(v
    4.12  	boot_cpu_data.x86_mask = eax & 0xf;
    4.13  }
    4.14  
    4.15 +#include <xen/interface/memory.h>
    4.16 +unsigned long *machine_to_phys_mapping;
    4.17 +EXPORT_SYMBOL(machine_to_phys_mapping);
    4.18 +unsigned int machine_to_phys_order;
    4.19 +EXPORT_SYMBOL(machine_to_phys_order);
    4.20 +
    4.21  void __init x86_64_start_kernel(char * real_mode_data)
    4.22  {
    4.23 +	struct xen_machphys_mapping mapping;
    4.24 +	unsigned long machine_to_phys_nr_ents;
    4.25  	char *s;
    4.26  	int i;
    4.27  
    4.28 @@ -105,6 +114,16 @@ void __init x86_64_start_kernel(char * r
    4.29  			xen_start_info->nr_pt_frames;
    4.30  	}
    4.31  
    4.32 +
    4.33 +	machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
    4.34 +	machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
    4.35 +	if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
    4.36 +		machine_to_phys_mapping = (unsigned long *)mapping.v_start;
    4.37 +		machine_to_phys_nr_ents = mapping.max_mfn + 1;
    4.38 +	}
    4.39 +	while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
    4.40 +		machine_to_phys_order++;
    4.41 +
    4.42  #if 0
    4.43  	for (i = 0; i < 256; i++)
    4.44  		set_intr_gate(i, early_idt_handler);
     5.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c	Fri Jun 23 15:26:01 2006 -0600
     5.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/fault-xen.c	Fri Jun 23 15:33:25 2006 -0600
     5.3 @@ -307,6 +307,49 @@ int exception_trace = 1;
     5.4  #define MEM_LOG(_f, _a...) ((void)0)
     5.5  #endif
     5.6  
     5.7 +static int spurious_fault(struct pt_regs *regs,
     5.8 +			  unsigned long address,
     5.9 +			  unsigned long error_code)
    5.10 +{
    5.11 +	pgd_t *pgd;
    5.12 +	pud_t *pud;
    5.13 +	pmd_t *pmd;
    5.14 +	pte_t *pte;
    5.15 +
    5.16 +#ifdef CONFIG_XEN
    5.17 +	/* Faults in hypervisor area are never spurious. */
    5.18 +	if ((address >= HYPERVISOR_VIRT_START) &&
    5.19 +	    (address < HYPERVISOR_VIRT_END))
    5.20 +		return 0;
    5.21 +#endif
    5.22 +
    5.23 +	/* Reserved-bit violation or user access to kernel space? */
    5.24 +	if (error_code & (PF_RSVD|PF_USER))
    5.25 +		return 0;
    5.26 +
    5.27 +	pgd = init_mm.pgd + pgd_index(address);
    5.28 +	if (!pgd_present(*pgd))
    5.29 +		return 0;
    5.30 +
    5.31 +	pud = pud_offset(pgd, address);
    5.32 +	if (!pud_present(*pud))
    5.33 +		return 0;
    5.34 +
    5.35 +	pmd = pmd_offset(pud, address);
    5.36 +	if (!pmd_present(*pmd))
    5.37 +		return 0;
    5.38 +
    5.39 +	pte = pte_offset_kernel(pmd, address);
    5.40 +	if (!pte_present(*pte))
    5.41 +		return 0;
    5.42 +	if ((error_code & PF_WRITE) && !pte_write(*pte))
    5.43 +		return 0;
    5.44 +	if ((error_code & PF_INSTR) && (pte_val(*pte) & _PAGE_NX))
    5.45 +		return 0;
    5.46 +
    5.47 +	return 1;
    5.48 +}
    5.49 +
    5.50  /*
    5.51   * This routine handles page faults.  It determines the address,
    5.52   * and the problem, and then passes it off to one of the appropriate
    5.53 @@ -361,16 +404,19 @@ asmlinkage void __kprobes do_page_fault(
    5.54  	 */
    5.55  	if (unlikely(address >= TASK_SIZE64)) {
    5.56  		/*
    5.57 -		 * Must check for the entire kernel range here: with writable
    5.58 -		 * page tables the hypervisor may temporarily clear PMD
    5.59 -		 * entries.
    5.60 +		 * Don't check for the module range here: its PML4
    5.61 +		 * is always initialized because it's shared with the main
    5.62 +		 * kernel text. Only vmalloc may need PML4 syncups.
    5.63  		 */
    5.64  		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
    5.65 -		    address >= PAGE_OFFSET) {
    5.66 +		      ((address >= VMALLOC_START && address < VMALLOC_END))) {
    5.67  			if (vmalloc_fault(address) < 0)
    5.68  				goto bad_area_nosemaphore;
    5.69  			return;
    5.70  		}
    5.71 +		/* Can take a spurious fault if mapping changes R/O -> R/W. */
    5.72 +		if (spurious_fault(regs, address, error_code))
    5.73 +			return;
    5.74  		/*
    5.75  		 * Don't take the mm semaphore here. If we fixup a prefetch
    5.76  		 * fault we could otherwise deadlock.
     6.1 --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig	Fri Jun 23 15:26:01 2006 -0600
     6.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig	Fri Jun 23 15:33:25 2006 -0600
     6.3 @@ -28,6 +28,11 @@ config XEN_UNPRIVILEGED_GUEST
     6.4  	bool
     6.5  	default !XEN_PRIVILEGED_GUEST
     6.6  
     6.7 +config XEN_PRIVCMD
     6.8 +	bool
     6.9 +	depends on PROC_FS
    6.10 +	default y
    6.11 +
    6.12  config XEN_BACKEND
    6.13          tristate "Backend driver support"
    6.14          default y
    6.15 @@ -84,6 +89,11 @@ config XEN_BLKDEV_BACKEND
    6.16  	  block devices to other guests via a high-performance shared-memory
    6.17  	  interface.
    6.18  
    6.19 +config XEN_XENBUS_DEV
    6.20 +	bool
    6.21 +	depends on PROC_FS
    6.22 +	default y
    6.23 +
    6.24  config XEN_NETDEV_BACKEND
    6.25  	tristate "Network-device backend driver"
    6.26          depends on XEN_BACKEND && NET
     7.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Fri Jun 23 15:26:01 2006 -0600
     7.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Fri Jun 23 15:33:25 2006 -0600
     7.3 @@ -58,7 +58,9 @@
     7.4  
     7.5  #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
     7.6  
     7.7 +#ifdef CONFIG_PROC_FS
     7.8  static struct proc_dir_entry *balloon_pde;
     7.9 +#endif
    7.10  
    7.11  static DECLARE_MUTEX(balloon_mutex);
    7.12  
    7.13 @@ -403,6 +405,7 @@ static int balloon_init_watcher(struct n
    7.14  	return NOTIFY_DONE;
    7.15  }
    7.16  
    7.17 +#ifdef CONFIG_PROC_FS
    7.18  static int balloon_write(struct file *file, const char __user *buffer,
    7.19  			 unsigned long count, void *data)
    7.20  {
    7.21 @@ -456,6 +459,7 @@ static int balloon_read(char *page, char
    7.22  	*eof = 1;
    7.23  	return len;
    7.24  }
    7.25 +#endif
    7.26  
    7.27  static struct notifier_block xenstore_notifier;
    7.28  
    7.29 @@ -464,11 +468,11 @@ static int __init balloon_init(void)
    7.30  	unsigned long pfn;
    7.31  	struct page *page;
    7.32  
    7.33 -	IPRINTK("Initialising balloon driver.\n");
    7.34 -
    7.35  	if (!is_running_on_xen())
    7.36  		return -ENODEV;
    7.37  
    7.38 +	IPRINTK("Initialising balloon driver.\n");
    7.39 +
    7.40  	current_pages = min(xen_start_info->nr_pages, max_pfn);
    7.41  	totalram_pages = current_pages;
    7.42  	target_pages  = current_pages;
    7.43 @@ -481,6 +485,7 @@ static int __init balloon_init(void)
    7.44  	balloon_timer.data = 0;
    7.45  	balloon_timer.function = balloon_alarm;
    7.46      
    7.47 +#ifdef CONFIG_PROC_FS
    7.48  	if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
    7.49  		WPRINTK("Unable to create /proc/xen/balloon.\n");
    7.50  		return -1;
    7.51 @@ -488,6 +493,7 @@ static int __init balloon_init(void)
    7.52  
    7.53  	balloon_pde->read_proc  = balloon_read;
    7.54  	balloon_pde->write_proc = balloon_write;
    7.55 +#endif
    7.56      
    7.57  	/* Initialise the balloon with excess memory space. */
    7.58  	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
     8.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c	Fri Jun 23 15:26:01 2006 -0600
     8.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c	Fri Jun 23 15:33:25 2006 -0600
     8.3 @@ -109,6 +109,9 @@ static int __init setup_vcpu_hotplug_eve
     8.4  	static struct notifier_block xsn_cpu = {
     8.5  		.notifier_call = setup_cpu_watcher };
     8.6  
     8.7 +	if (!is_running_on_xen())
     8.8 +		return -ENODEV;
     8.9 +
    8.10  	register_cpu_notifier(&hotplug_cpu);
    8.11  	register_xenstore_notifier(&xsn_cpu);
    8.12  
     9.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c	Fri Jun 23 15:26:01 2006 -0600
     9.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c	Fri Jun 23 15:33:25 2006 -0600
     9.3 @@ -666,6 +666,10 @@ static struct hw_interrupt_type pirq_typ
     9.4  int irq_ignore_unhandled(unsigned int irq)
     9.5  {
     9.6  	struct physdev_irq_status_query irq_status = { .irq = irq };
     9.7 +
     9.8 +	if (!is_running_on_xen())
     9.9 +		return 0;
    9.10 +
    9.11  	(void)HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
    9.12  	return !!(irq_status.flags & XENIRQSTAT_shared);
    9.13  }
    10.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile	Fri Jun 23 15:26:01 2006 -0600
    10.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile	Fri Jun 23 15:33:25 2006 -0600
    10.3 @@ -1,2 +1,2 @@
    10.4  
    10.5 -obj-y	:= privcmd.o
    10.6 +obj-$(CONFIG_XEN_PRIVCMD)	:= privcmd.o
    11.1 --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c	Fri Jun 23 15:26:01 2006 -0600
    11.2 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c	Fri Jun 23 15:33:25 2006 -0600
    11.3 @@ -71,8 +71,6 @@ static int packet_read_shmem(struct pack
    11.4  			     char *buffer, int isuserbuffer, u32 left);
    11.5  static int vtpm_queue_packet(struct packet *pak);
    11.6  
    11.7 -#define MIN(x,y)  (x) < (y) ? (x) : (y)
    11.8 -
    11.9  /***************************************************************
   11.10   Buffer copying fo user and kernel space buffes.
   11.11  ***************************************************************/
   11.12 @@ -309,7 +307,7 @@ int _packet_write(struct packet *pak,
   11.13  			return 0;
   11.14  		}
   11.15  
   11.16 -		tocopy = MIN(size - offset, PAGE_SIZE);
   11.17 +		tocopy = min_t(size_t, size - offset, PAGE_SIZE);
   11.18  
   11.19  		if (copy_from_buffer((void *)(MMAP_VADDR(tpmif, i) |
   11.20  					      (tx->addr & ~PAGE_MASK)),
   11.21 @@ -365,7 +363,7 @@ static int packet_read(struct packet *pa
   11.22  		u32 instance_no = htonl(pak->tpm_instance);
   11.23  		u32 last_read = pak->last_read;
   11.24  
   11.25 -		to_copy = MIN(4 - last_read, numbytes);
   11.26 +		to_copy = min_t(size_t, 4 - last_read, numbytes);
   11.27  
   11.28  		if (copy_to_buffer(&buffer[0],
   11.29  				   &(((u8 *) & instance_no)[last_read]),
   11.30 @@ -384,7 +382,7 @@ static int packet_read(struct packet *pa
   11.31  
   11.32  	if (room_left > 0) {
   11.33  		if (pak->data_buffer) {
   11.34 -			u32 to_copy = MIN(pak->data_len - offset, room_left);
   11.35 +			u32 to_copy = min_t(u32, pak->data_len - offset, room_left);
   11.36  			u32 last_read = pak->last_read - 4;
   11.37  
   11.38  			if (copy_to_buffer(&buffer[offset],
   11.39 @@ -424,7 +422,7 @@ static int packet_read_shmem(struct pack
   11.40  	 * and within that page at offset 'offset'.
   11.41  	 * Copy a maximum of 'room_left' bytes.
   11.42  	 */
   11.43 -	to_copy = MIN(PAGE_SIZE - pg_offset, room_left);
   11.44 +	to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left);
   11.45  	while (to_copy > 0) {
   11.46  		void *src;
   11.47  		struct gnttab_map_grant_ref map_op;
   11.48 @@ -451,7 +449,7 @@ static int packet_read_shmem(struct pack
   11.49  			/*
   11.50  			 * User requests more than what's available
   11.51  			 */
   11.52 -			to_copy = MIN(tx->size, to_copy);
   11.53 +			to_copy = min_t(u32, tx->size, to_copy);
   11.54  		}
   11.55  
   11.56  		DPRINTK("Copying from mapped memory at %08lx\n",
   11.57 @@ -483,7 +481,7 @@ static int packet_read_shmem(struct pack
   11.58  		last_read += to_copy;
   11.59  		room_left -= to_copy;
   11.60  
   11.61 -		to_copy = MIN(PAGE_SIZE, room_left);
   11.62 +		to_copy = min_t(u32, PAGE_SIZE, room_left);
   11.63  		i++;
   11.64  	}			/* while (to_copy > 0) */
   11.65  	/*
   11.66 @@ -545,7 +543,7 @@ static ssize_t vtpm_op_read(struct file 
   11.67  
   11.68  		DPRINTK("size given by app: %d, available: %d\n", size, left);
   11.69  
   11.70 -		ret_size = MIN(size, left);
   11.71 +		ret_size = min_t(size_t, size, left);
   11.72  
   11.73  		ret_size = packet_read(pak, ret_size, data, size, 1);
   11.74  
    12.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile	Fri Jun 23 15:26:01 2006 -0600
    12.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile	Fri Jun 23 15:33:25 2006 -0600
    12.3 @@ -9,4 +9,4 @@ xenbus-objs += xenbus_client.o
    12.4  xenbus-objs += xenbus_comms.o
    12.5  xenbus-objs += xenbus_xs.o
    12.6  xenbus-objs += xenbus_probe.o
    12.7 -xenbus-objs += xenbus_dev.o
    12.8 +obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
    13.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Fri Jun 23 15:26:01 2006 -0600
    13.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Fri Jun 23 15:33:25 2006 -0600
    13.3 @@ -926,6 +926,7 @@ void xenbus_probe(void *unused)
    13.4  }
    13.5  
    13.6  
    13.7 +#ifdef CONFIG_PROC_FS
    13.8  static struct file_operations xsd_kva_fops;
    13.9  static struct proc_dir_entry *xsd_kva_intf;
   13.10  static struct proc_dir_entry *xsd_port_intf;
   13.11 @@ -964,6 +965,7 @@ static int xsd_port_read(char *page, cha
   13.12  	*eof = 1;
   13.13  	return len;
   13.14  }
   13.15 +#endif
   13.16  
   13.17  
   13.18  static int __init xenbus_probe_init(void)
   13.19 @@ -1008,6 +1010,7 @@ static int __init xenbus_probe_init(void
   13.20  		BUG_ON(err);
   13.21  		xen_start_info->store_evtchn = alloc_unbound.port;
   13.22  
   13.23 +#ifdef CONFIG_PROC_FS
   13.24  		/* And finally publish the above info in /proc/xen */
   13.25  		xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
   13.26  		if (xsd_kva_intf) {
   13.27 @@ -1020,6 +1023,7 @@ static int __init xenbus_probe_init(void
   13.28  		xsd_port_intf = create_xen_proc_entry("xsd_port", 0400);
   13.29  		if (xsd_port_intf)
   13.30  			xsd_port_intf->read_proc = xsd_port_read;
   13.31 +#endif
   13.32  	} else
   13.33  		xenstored_ready = 1;
   13.34  
    14.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Fri Jun 23 15:26:01 2006 -0600
    14.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Fri Jun 23 15:33:25 2006 -0600
    14.3 @@ -67,6 +67,10 @@
    14.4  
    14.5  extern unsigned long *phys_to_machine_mapping;
    14.6  
    14.7 +#undef machine_to_phys_mapping
    14.8 +extern unsigned long *machine_to_phys_mapping;
    14.9 +extern unsigned int   machine_to_phys_order;
   14.10 +
   14.11  static inline unsigned long pfn_to_mfn(unsigned long pfn)
   14.12  {
   14.13  	if (xen_feature(XENFEAT_auto_translated_physmap))
   14.14 @@ -84,24 +88,29 @@ static inline int phys_to_machine_mappin
   14.15  
   14.16  static inline unsigned long mfn_to_pfn(unsigned long mfn)
   14.17  {
   14.18 +	extern unsigned long max_mapnr;
   14.19  	unsigned long pfn;
   14.20  
   14.21  	if (xen_feature(XENFEAT_auto_translated_physmap))
   14.22  		return mfn;
   14.23  
   14.24 -	/*
   14.25 -	 * The array access can fail (e.g., device space beyond end of RAM).
   14.26 -	 * In such cases it doesn't matter what we return (we return garbage),
   14.27 -	 * but we must handle the fault without crashing!
   14.28 -	 */
   14.29 +	if (unlikely((mfn >> machine_to_phys_order) != 0))
   14.30 +		return max_mapnr;
   14.31 +
   14.32 +	/* The array access can fail (e.g., device space beyond end of RAM). */
   14.33  	asm (
   14.34  		"1:	movl %1,%0\n"
   14.35  		"2:\n"
   14.36 +		".section .fixup,\"ax\"\n"
   14.37 +		"3:	movl %2,%0\n"
   14.38 +		"	jmp  2b\n"
   14.39 +		".previous\n"
   14.40  		".section __ex_table,\"a\"\n"
   14.41  		"	.align 4\n"
   14.42 -		"	.long 1b,2b\n"
   14.43 +		"	.long 1b,3b\n"
   14.44  		".previous"
   14.45 -		: "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
   14.46 +		: "=r" (pfn)
   14.47 +		: "m" (machine_to_phys_mapping[mfn]), "m" (max_mapnr) );
   14.48  
   14.49  	return pfn;
   14.50  }
    15.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h	Fri Jun 23 15:26:01 2006 -0600
    15.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h	Fri Jun 23 15:33:25 2006 -0600
    15.3 @@ -7,6 +7,7 @@
    15.4   **/
    15.5  
    15.6  #include <xen/interface/callback.h>
    15.7 +#include <xen/interface/memory.h>
    15.8  
    15.9  static char * __init machine_specific_memory_setup(void)
   15.10  {
   15.11 @@ -44,9 +45,16 @@ extern void hypervisor_callback(void);
   15.12  extern void failsafe_callback(void);
   15.13  extern void nmi(void);
   15.14  
   15.15 +unsigned long *machine_to_phys_mapping;
   15.16 +EXPORT_SYMBOL(machine_to_phys_mapping);
   15.17 +unsigned int machine_to_phys_order;
   15.18 +EXPORT_SYMBOL(machine_to_phys_order);
   15.19 +
   15.20  static void __init machine_specific_arch_setup(void)
   15.21  {
   15.22  	int ret;
   15.23 +	struct xen_machphys_mapping mapping;
   15.24 +	unsigned long machine_to_phys_nr_ents;
   15.25  	struct xen_platform_parameters pp;
   15.26  	struct callback_register event = {
   15.27  		.type = CALLBACKTYPE_event,
   15.28 @@ -81,4 +89,13 @@ static void __init machine_specific_arch
   15.29  	if (HYPERVISOR_xen_version(XENVER_platform_parameters,
   15.30  				   &pp) == 0)
   15.31  		set_fixaddr_top(pp.virt_start - PAGE_SIZE);
   15.32 +
   15.33 +	machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
   15.34 +	machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
   15.35 +	if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
   15.36 +		machine_to_phys_mapping = (unsigned long *)mapping.v_start;
   15.37 +		machine_to_phys_nr_ents = mapping.max_mfn + 1;
   15.38 +	}
   15.39 +	while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
   15.40 +		machine_to_phys_order++;
   15.41  }
    16.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h	Fri Jun 23 15:26:01 2006 -0600
    16.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h	Fri Jun 23 15:33:25 2006 -0600
    16.3 @@ -85,6 +85,10 @@ void copy_page(void *, void *);
    16.4  
    16.5  extern unsigned long *phys_to_machine_mapping;
    16.6  
    16.7 +#undef machine_to_phys_mapping
    16.8 +extern unsigned long *machine_to_phys_mapping;
    16.9 +extern unsigned int   machine_to_phys_order;
   16.10 +
   16.11  static inline unsigned long pfn_to_mfn(unsigned long pfn)
   16.12  {
   16.13  	if (xen_feature(XENFEAT_auto_translated_physmap))
   16.14 @@ -107,19 +111,23 @@ static inline unsigned long mfn_to_pfn(u
   16.15  	if (xen_feature(XENFEAT_auto_translated_physmap))
   16.16  		return mfn;
   16.17  
   16.18 -	/*
   16.19 -	 * The array access can fail (e.g., device space beyond end of RAM).
   16.20 -	 * In such cases it doesn't matter what we return (we return garbage),
   16.21 -	 * but we must handle the fault without crashing!
   16.22 -	 */
   16.23 +	if (unlikely((mfn >> machine_to_phys_order) != 0))
   16.24 +		return end_pfn;
   16.25 +
   16.26 +	/* The array access can fail (e.g., device space beyond end of RAM). */
   16.27  	asm (
   16.28  		"1:	movq %1,%0\n"
   16.29  		"2:\n"
   16.30 +		".section .fixup,\"ax\"\n"
   16.31 +		"3:	movq %2,%0\n"
   16.32 +		"	jmp  2b\n"
   16.33 +		".previous\n"
   16.34  		".section __ex_table,\"a\"\n"
   16.35  		"	.align 8\n"
   16.36 -		"	.quad 1b,2b\n"
   16.37 +		"	.quad 1b,3b\n"
   16.38  		".previous"
   16.39 -		: "=r" (pfn) : "m" (machine_to_phys_mapping[mfn]) );
   16.40 +		: "=r" (pfn)
   16.41 +		: "m" (machine_to_phys_mapping[mfn]), "m" (end_pfn) );
   16.42  
   16.43  	return pfn;
   16.44  }
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/patches/linux-2.6.16.13/ipv6-no-autoconf.patch	Fri Jun 23 15:33:25 2006 -0600
    17.3 @@ -0,0 +1,23 @@
    17.4 + net/ipv6/addrconf.c |    2 ++
    17.5 + 1 files changed, 2 insertions(+)
    17.6 +
    17.7 +Index: build/net/ipv6/addrconf.c
    17.8 +===================================================================
    17.9 +--- build.orig/net/ipv6/addrconf.c
   17.10 ++++ build/net/ipv6/addrconf.c
   17.11 +@@ -2462,6 +2462,7 @@ static void addrconf_dad_start(struct in
   17.12 + 	spin_lock_bh(&ifp->lock);
   17.13 + 
   17.14 + 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
   17.15 ++	    !(dev->flags&IFF_MULTICAST) ||
   17.16 + 	    !(ifp->flags&IFA_F_TENTATIVE)) {
   17.17 + 		ifp->flags &= ~IFA_F_TENTATIVE;
   17.18 + 		spin_unlock_bh(&ifp->lock);
   17.19 +@@ -2546,6 +2547,7 @@ static void addrconf_dad_completed(struc
   17.20 + 	if (ifp->idev->cnf.forwarding == 0 &&
   17.21 + 	    ifp->idev->cnf.rtr_solicits > 0 &&
   17.22 + 	    (dev->flags&IFF_LOOPBACK) == 0 &&
   17.23 ++	    (dev->flags & IFF_MULTICAST) &&
   17.24 + 	    (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
   17.25 + 		struct in6_addr all_routers;
   17.26 + 
    18.1 --- a/tools/blktap/Makefile	Fri Jun 23 15:26:01 2006 -0600
    18.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.3 @@ -1,94 +0,0 @@
    18.4 -MAJOR    = 3.0
    18.5 -MINOR    = 0
    18.6 -SONAME   = libblktap.so.$(MAJOR)
    18.7 -
    18.8 -XEN_ROOT = ../..
    18.9 -include $(XEN_ROOT)/tools/Rules.mk
   18.10 -
   18.11 -SUBDIRS :=
   18.12 -SUBDIRS += ublkback
   18.13 -#SUBDIRS += parallax
   18.14 -
   18.15 -BLKTAP_INSTALL_DIR = /usr/sbin
   18.16 -
   18.17 -INSTALL            = install
   18.18 -INSTALL_PROG       = $(INSTALL) -m0755
   18.19 -INSTALL_DIR        = $(INSTALL) -d -m0755
   18.20 -
   18.21 -INCLUDES += -I. -I $(XEN_LIBXC) -I $(XEN_XENSTORE)
   18.22 -
   18.23 -LIBS     := -lpthread -lz
   18.24 -
   18.25 -SRCS     :=
   18.26 -SRCS     += blktaplib.c xenbus.c blkif.c
   18.27 -
   18.28 -CFLAGS   += -Werror
   18.29 -CFLAGS   += -Wno-unused
   18.30 -CFLAGS   += -fno-strict-aliasing
   18.31 -CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
   18.32 -# get asprintf():
   18.33 -CFLAGS   += -D _GNU_SOURCE
   18.34 -# Get gcc to generate the dependencies for us.
   18.35 -CFLAGS   += -Wp,-MD,.$(@F).d
   18.36 -CFLAGS   += $(INCLUDES) 
   18.37 -DEPS     = .*.d
   18.38 -
   18.39 -OBJS     = $(patsubst %.c,%.o,$(SRCS))
   18.40 -IBINS   :=
   18.41 -#IBINS   += blkdump
   18.42 -
   18.43 -LIB      = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR)
   18.44 -
   18.45 -.PHONY: all
   18.46 -all: mk-symlinks libblktap.so #blkdump
   18.47 -	@set -e; for subdir in $(SUBDIRS); do \
   18.48 -		$(MAKE) -C $$subdir $@;       \
   18.49 -	done
   18.50 -
   18.51 -.PHONY: install
   18.52 -install: all
   18.53 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
   18.54 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/include
   18.55 -	$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
   18.56 -	$(INSTALL_PROG) blktaplib.h $(DESTDIR)/usr/include
   18.57 -	#$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(BLKTAP_INSTALL_DIR)
   18.58 -	@set -e; for subdir in $(SUBDIRS); do \
   18.59 -		$(MAKE) -C $$subdir $@;       \
   18.60 -	done
   18.61 -
   18.62 -.PHONY: clean
   18.63 -clean:
   18.64 -	rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump
   18.65 -	@set -e; for subdir in $(SUBDIRS); do \
   18.66 -		$(MAKE) -C $$subdir $@;       \
   18.67 -	done
   18.68 -
   18.69 -.PHONY: rpm
   18.70 -rpm: all
   18.71 -	rm -rf staging
   18.72 -	mkdir staging
   18.73 -	mkdir staging/i386
   18.74 -	rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
   18.75 -		--define "_rpmdir$$PWD/staging" -bb rpm.spec
   18.76 -	mv staging/i386/*.rpm .
   18.77 -	rm -rf staging
   18.78 -
   18.79 -libblktap.so: $(OBJS) 
   18.80 -	$(CC) $(CFLAGS) -Wl,-soname -Wl,$(SONAME) -shared         \
   18.81 -	      -L$(XEN_XENSTORE) -l xenstore                       \
   18.82 -	      -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
   18.83 -	ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
   18.84 -	ln -sf libblktap.so.$(MAJOR) $@
   18.85 -
   18.86 -blkdump: libblktap.so
   18.87 -	$(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L. \
   18.88 -	      -l blktap blkdump.c
   18.89 -
   18.90 -.PHONY: TAGS clean install mk-symlinks rpm
   18.91 -
   18.92 -.PHONY: TAGS
   18.93 -TAGS:
   18.94 -	etags -t $(SRCS) *.h
   18.95 -
   18.96 --include $(DEPS)
   18.97 -
    19.1 --- a/tools/blktap/README	Fri Jun 23 15:26:01 2006 -0600
    19.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.3 @@ -1,149 +0,0 @@
    19.4 -Block Tap User-level Interfaces
    19.5 -Andrew Warfield
    19.6 -andrew.warfield@cl.cam.ac.uk
    19.7 -February 8, 2005
    19.8 -
    19.9 -NOTE #1: The blktap is _experimental_ code.  It works for me.  Your
   19.10 -mileage may vary.  Don't use it for anything important.  Please. ;)
   19.11 -
   19.12 -NOTE #2: All of the interfaces here are likely to change.  This is all
   19.13 -early code, and I am checking it in because others want to play with
   19.14 -it.  If you use it for anything, please let me know!
   19.15 -
   19.16 -Overview:
   19.17 ----------
   19.18 -
   19.19 -This directory contains a library and set of example applications for
   19.20 -the block tap device.  The block tap hooks into the split block device
   19.21 -interfaces above Xen allowing them to be extended.  This extension can
   19.22 -be done in userspace with the help of a library.
   19.23 -
   19.24 -The tap can be installed either as an interposition domain in between
   19.25 -a frontend and backend driver pair, or as a terminating backend, in
   19.26 -which case it is responsible for serving all requests itself.
   19.27 -
   19.28 -There are two reasons that you might want to use the tap,
   19.29 -corresponding to these configurations:
   19.30 -
   19.31 - 1. To examine or modify a stream of block requests while they are
   19.32 -    in-flight (e.g. to encrypt data, or add data-driven watchpoints)
   19.33 -
   19.34 - 2. To prototype a new backend driver, serving requests from the tap
   19.35 -    rather than passing them along to the XenLinux blkback driver.
   19.36 -    (e.g. to forward block requests to a remote host)
   19.37 -
   19.38 -
   19.39 -Interface:
   19.40 -----------
   19.41 -
   19.42 -At the moment, the tap interface is similar in spirit to that of the
   19.43 -Linux netfilter.  Requests are messages from a client (frontend)
   19.44 -domain to a disk (backend) domain.  Responses are messages travelling
   19.45 -back, acknowledging the completion of a request.  the library allows
   19.46 -chains of functions to be attached to these events.  In addition,
   19.47 -hooks may be attached to handle control messages, which signify things
   19.48 -like connections from new domains.
   19.49 -
   19.50 -At present the control messages especially expose a lot of the
   19.51 -underlying driver interfaces.  This may change in the future in order
   19.52 -to simplify writing hooks.
   19.53 -
   19.54 -Here are the public interfaces:
   19.55 -
   19.56 -These allow hook functions to be chained:
   19.57 -
   19.58 - void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *));
   19.59 - void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *));
   19.60 - void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *));
   19.61 -
   19.62 -This allows a response to be injected, in the case where a request has
   19.63 -been removed using BLKTAP_STOLEN.
   19.64 -
   19.65 - void blktap_inject_response(blkif_response_t *);
   19.66 -
   19.67 -These let you add file descriptors and handlers to the main poll loop:
   19.68 -
   19.69 - int  blktap_attach_poll(int fd, short events, int (*func)(int));
   19.70 - void blktap_detach_poll(int fd);
   19.71 -
   19.72 -This starts the main poll loop:
   19.73 -
   19.74 - int  blktap_listen(void);
   19.75 -
   19.76 -Example:
   19.77 ---------
   19.78 -
   19.79 -blkimage.c uses an image on the local file system to serve requests to
   19.80 -a domain.  Here's what it looks like:
   19.81 -
   19.82 ----[blkimg.c]---
   19.83 -
   19.84 -/* blkimg.c
   19.85 - *
   19.86 - * file-backed disk.
   19.87 - */
   19.88 -
   19.89 -#include "blktaplib.h"
   19.90 -#include "blkimglib.h"
   19.91 -
   19.92 -
   19.93 -int main(int argc, char *argv[])
   19.94 -{
   19.95 -    image_init();
   19.96 -    
   19.97 -    blktap_register_ctrl_hook("image_control", image_control);
   19.98 -    blktap_register_request_hook("image_request", image_request);
   19.99 -    blktap_listen();
  19.100 -    
  19.101 -    return 0;
  19.102 -}
  19.103 -
  19.104 -----------------
  19.105 -
  19.106 -All of the real work is in blkimglib.c, but this illustrates the
  19.107 -actual tap interface well enough.  image_control() will be called with
  19.108 -all control messages.  image_request() handles requests.  As it reads
  19.109 -from an on-disk image file, no requests are ever passed on to a
  19.110 -backend, and so there will be no responses to process -- so there is
  19.111 -nothing registered as a response hook.
  19.112 -
  19.113 -Other examples:
  19.114 ----------------
  19.115 -
  19.116 -Here is a list of other examples in the directory:
  19.117 -
  19.118 -Things that terminate a block request stream:
  19.119 -
  19.120 -  blkimg    - Use a image file/device to serve requests
  19.121 -  blkgnbd   - Use a remote gnbd server to serve requests
  19.122 -  blkaio    - Use libaio... (DOES NOT WORK)
  19.123 -  
  19.124 -Things that don't:
  19.125 -
  19.126 -  blkdump   - Print in-flight requests.
  19.127 -  blkcow    - Really inefficient copy-on-write disks using libdb to store
  19.128 -              writes.
  19.129 -
  19.130 -There are examples of plugging these things together, for instance
  19.131 -blkcowgnbd is a read-only gnbd device with copy-on-write to a local
  19.132 -file.
  19.133 -
  19.134 -TODO:
  19.135 ------
  19.136 -
  19.137 -- Make session tracking work.  At the moment these generally just handle a 
  19.138 -  single front-end client at a time.
  19.139 -
  19.140 -- Integrate with Xend.  Need to cleanly pass a image identifier in the connect
  19.141 -  message.
  19.142 -
  19.143 -- Make an asynchronous file-io terminator.  The libaio attempt is
  19.144 -  tragically stalled because mapped foreign pages make pfn_valid fail
  19.145 -  (they are VM_IO), and so cannot be passed to aio as targets.  A
  19.146 -  better solution may be to tear the disk interfaces out of the real
  19.147 -  backend and expose them somehow.
  19.148 -
  19.149 -- Make CoW suck less.
  19.150 -
  19.151 -- Do something more along the lines of dynamic linking for the
  19.152 -  plugins, so thatthey don't all need a new main().
    20.1 --- a/tools/blktap/README.sept05	Fri Jun 23 15:26:01 2006 -0600
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,33 +0,0 @@
    20.4 -The blktap has been rewritten substantially based on the current
    20.5 -blkback driver.  I've removed passthrough support, as this is broken
    20.6 -by the move to grant tables and the lack of transitive grants.  A
    20.7 -blktap VM is now only capable of terminating block requests in
    20.8 -userspace.
    20.9 -
   20.10 -ublkback/ contains a _very_ initial cut at a user-level version of the block
   20.11 -backend driver.  It gives a working example of how the current tap
   20.12 -interfaces are used, in particular w.r.t. the vbd directories in
   20.13 -xenstore.
   20.14 -
   20.15 -parallax/ contains fairly recent parallax code.  This does not run on
   20.16 -the changed blktap interface, but should only be a couple of hours
   20.17 -work to get going again.
   20.18 -
   20.19 -All of the tricky bits are done, but there is plenty of cleaning to
   20.20 -do, and the top-level functionality is not here yet.  At the moment,
   20.21 -the daemon ignores the pdev requested by the tools and opens the file 
   20.22 -or device specified by TMP_IMAGE_FILE_NAME in ublkback.c.
   20.23 -
   20.24 -TODO:
   20.25 -1. Fix to allow pdev in the store to specify the device to open.
   20.26 -2. Add support (to tools as well) to mount arbitrary files...
   20.27 -   just write the filename to mount into the store, instead of pdev.
   20.28 -3. Reeximine blkif refcounting, it is almost certainly broken at the moment.
   20.29 -   - creating a blkif should take a reference.
   20.30 -   - each inflight request should take a reference on dequeue in blktaplib
   20.31 -   - sending responses should drop refs.
   20.32 -   - blkif should be implicitly freed when refcounts fall to 0.
   20.33 -4. Modify the parallax req/rsp code as per ublkback to use the new tap 
   20.34 -   interfaces. 
   20.35 -5. Write a front end that allows parallax and normal mounts to coexist
   20.36 -6. Allow blkback and blktap to run at the same time.
    21.1 --- a/tools/blktap/blkdump.c	Fri Jun 23 15:26:01 2006 -0600
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,62 +0,0 @@
    21.4 -/* blkdump.c
    21.5 - *
    21.6 - * show a running trace of block requests as they fly by.
    21.7 - * 
    21.8 - * (c) 2004 Andrew Warfield.
    21.9 - */
   21.10 - 
   21.11 -#include <stdio.h>
   21.12 -#include "blktaplib.h"
   21.13 - 
   21.14 -int request_print(blkif_request_t *req)
   21.15 -{
   21.16 -    int i;
   21.17 -    
   21.18 -    if ( (req->operation == BLKIF_OP_READ) ||
   21.19 -         (req->operation == BLKIF_OP_WRITE) )
   21.20 -    {
   21.21 -        printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", 
   21.22 -                ID_TO_DOM(req->id), ID_TO_IDX(req->id), 
   21.23 -                blkif_op_name[req->operation], 
   21.24 -                req->nr_segments, req->handle, 
   21.25 -                req->sector_number);
   21.26 -        
   21.27 -        
   21.28 -        for (i=0; i < req->nr_segments; i++) {
   21.29 -            printf("              (gref: 0x%8x start: %u stop: %u)\n",
   21.30 -                   req->seg[i].gref,
   21.31 -                   req->seg[i].first_sect,
   21.32 -                   req->seg[i].last_sect);
   21.33 -        }
   21.34 -            
   21.35 -    } else {
   21.36 -        printf("Unknown request message type.\n");
   21.37 -    }
   21.38 -    
   21.39 -    return BLKTAP_PASS;
   21.40 -}
   21.41 -
   21.42 -int response_print(blkif_response_t *rsp)
   21.43 -{   
   21.44 -    if ( (rsp->operation == BLKIF_OP_READ) ||
   21.45 -         (rsp->operation == BLKIF_OP_WRITE) )
   21.46 -    {
   21.47 -        printf("[%2u:%2u>%5s] (status: %d)\n", 
   21.48 -                ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), 
   21.49 -                blkif_op_name[rsp->operation], 
   21.50 -                rsp->status);
   21.51 -            
   21.52 -    } else {
   21.53 -        printf("Unknown request message type.\n");
   21.54 -    }
   21.55 -    return BLKTAP_PASS;
   21.56 -}
   21.57 -
   21.58 -int main(int argc, char *argv[])
   21.59 -{
   21.60 -    blktap_register_request_hook("request_print", request_print);
   21.61 -    blktap_register_response_hook("response_print", response_print);
   21.62 -    blktap_listen();
   21.63 -    
   21.64 -    return 0;
   21.65 -}
    22.1 --- a/tools/blktap/blkif.c	Fri Jun 23 15:26:01 2006 -0600
    22.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.3 @@ -1,212 +0,0 @@
    22.4 -/*
    22.5 - * blkif.c
    22.6 - * 
    22.7 - * The blkif interface for blktap.  A blkif describes an in-use virtual disk.
    22.8 - */
    22.9 -
   22.10 -#include <stdio.h>
   22.11 -#include <stdlib.h>
   22.12 -#include <errno.h>
   22.13 -#include <string.h>
   22.14 -#include <err.h>
   22.15 -
   22.16 -#include "blktaplib.h"
   22.17 -
   22.18 -#if 1
   22.19 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   22.20 -#else
   22.21 -#define DPRINTF(_f, _a...) ((void)0)
   22.22 -#endif
   22.23 -
   22.24 -#define BLKIF_HASHSZ 1024
   22.25 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
   22.26 -
   22.27 -static blkif_t      *blkif_hash[BLKIF_HASHSZ];
   22.28 -
   22.29 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   22.30 -{
   22.31 -    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
   22.32 -    while ( (blkif != NULL) && 
   22.33 -            ((blkif->domid != domid) || (blkif->handle != handle)) )
   22.34 -        blkif = blkif->hash_next;
   22.35 -    return blkif;
   22.36 -}
   22.37 -
   22.38 -blkif_t *alloc_blkif(domid_t domid)
   22.39 -{
   22.40 -    blkif_t *blkif;
   22.41 -
   22.42 -    blkif = (blkif_t *)malloc(sizeof(blkif_t));
   22.43 -    if (!blkif)
   22.44 -        return NULL;
   22.45 -
   22.46 -    memset(blkif, 0, sizeof(*blkif));
   22.47 -    blkif->domid = domid;
   22.48 -
   22.49 -    return blkif;
   22.50 -}
   22.51 -
   22.52 -static int (*new_blkif_hook)(blkif_t *blkif) = NULL;
   22.53 -void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
   22.54 -{
   22.55 -    new_blkif_hook = fn;
   22.56 -}
   22.57 -
   22.58 -int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
   22.59 -               long int readonly)
   22.60 -{
   22.61 -    domid_t domid;
   22.62 -    blkif_t **pblkif;
   22.63 -    
   22.64 -    if (blkif == NULL)
   22.65 -        return -EINVAL;
   22.66 -
   22.67 -    domid = blkif->domid;
   22.68 -    blkif->handle   = handle;
   22.69 -    blkif->pdev     = pdev;
   22.70 -    blkif->readonly = readonly;
   22.71 -
   22.72 -    /*
   22.73 -     * Call out to the new_blkif_hook. The tap application should define this,
   22.74 -     * and it should return having set blkif->ops
   22.75 -     * 
   22.76 -     */
   22.77 -    if (new_blkif_hook == NULL)
   22.78 -    {
   22.79 -        warn("Probe detected a new blkif, but no new_blkif_hook!");
   22.80 -        return -1;
   22.81 -    }
   22.82 -    new_blkif_hook(blkif);
   22.83 -
   22.84 -    /* Now wire it in. */
   22.85 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   22.86 -    while ( *pblkif != NULL )
   22.87 -    {
   22.88 -        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   22.89 -        {
   22.90 -            DPRINTF("Could not create blkif: already exists\n");
   22.91 -            return -1;
   22.92 -        }
   22.93 -        pblkif = &(*pblkif)->hash_next;
   22.94 -    }
   22.95 -    blkif->hash_next = NULL;
   22.96 -    *pblkif = blkif;
   22.97 -
   22.98 -    return 0;
   22.99 -}
  22.100 -
  22.101 -void free_blkif(blkif_t *blkif)
  22.102 -{
  22.103 -    blkif_t **pblkif, *curs;
  22.104 -    
  22.105 -    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid, blkif->handle)];
  22.106 -    while ( (curs = *pblkif) != NULL )
  22.107 -    {
  22.108 -        if ( blkif == curs )
  22.109 -        {
  22.110 -            *pblkif = curs->hash_next;
  22.111 -        }
  22.112 -        pblkif = &curs->hash_next;
  22.113 -    }
  22.114 -    free(blkif);
  22.115 -}
  22.116 -
  22.117 -void blkif_register_request_hook(blkif_t *blkif, char *name, 
  22.118 -                                 int (*rh)(blkif_t *, blkif_request_t *, int)) 
  22.119 -{
  22.120 -    request_hook_t *rh_ent, **c;
  22.121 -    
  22.122 -    rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t));
  22.123 -    if (!rh_ent) 
  22.124 -    {
  22.125 -        warn("couldn't allocate a new hook");
  22.126 -        return;
  22.127 -    }
  22.128 -    
  22.129 -    rh_ent->func  = rh;
  22.130 -    rh_ent->next = NULL;
  22.131 -    if (asprintf(&rh_ent->name, "%s", name) == -1)
  22.132 -    {
  22.133 -        free(rh_ent);
  22.134 -        warn("couldn't allocate a new hook name");
  22.135 -        return;
  22.136 -    }
  22.137 -    
  22.138 -    c = &blkif->request_hook_chain;
  22.139 -    while (*c != NULL) {
  22.140 -        c = &(*c)->next;
  22.141 -    }
  22.142 -    *c = rh_ent;
  22.143 -}
  22.144 -
  22.145 -void blkif_register_response_hook(blkif_t *blkif, char *name, 
  22.146 -                                  int (*rh)(blkif_t *, blkif_response_t *, int)) 
  22.147 -{
  22.148 -    response_hook_t *rh_ent, **c;
  22.149 -    
  22.150 -    rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t));
  22.151 -    if (!rh_ent) 
  22.152 -    { 
  22.153 -        warn("couldn't allocate a new hook");
  22.154 -        return;
  22.155 -    }
  22.156 -    
  22.157 -    rh_ent->func  = rh;
  22.158 -    rh_ent->next = NULL;
  22.159 -    if (asprintf(&rh_ent->name, "%s", name) == -1)
  22.160 -    {
  22.161 -        free(rh_ent);
  22.162 -        warn("couldn't allocate a new hook name");
  22.163 -        return;
  22.164 -    }
  22.165 -    
  22.166 -    c = &blkif->response_hook_chain;
  22.167 -    while (*c != NULL) {
  22.168 -        c = &(*c)->next;
  22.169 -    }
  22.170 -    *c = rh_ent;
  22.171 -}
  22.172 -
  22.173 -void blkif_print_hooks(blkif_t *blkif)
  22.174 -{
  22.175 -    request_hook_t  *req_hook;
  22.176 -    response_hook_t *rsp_hook;
  22.177 -    
  22.178 -    DPRINTF("Request Hooks:\n");
  22.179 -    req_hook = blkif->request_hook_chain;
  22.180 -    while (req_hook != NULL)
  22.181 -    {
  22.182 -        DPRINTF("  [0x%p] %s\n", req_hook->func, req_hook->name);
  22.183 -        req_hook = req_hook->next;
  22.184 -    }
  22.185 -    
  22.186 -    DPRINTF("Response Hooks:\n");
  22.187 -    rsp_hook = blkif->response_hook_chain;
  22.188 -    while (rsp_hook != NULL)
  22.189 -    {
  22.190 -        DPRINTF("  [0x%p] %s\n", rsp_hook->func, rsp_hook->name);
  22.191 -        rsp_hook = rsp_hook->next;
  22.192 -    }
  22.193 -}
  22.194 -
  22.195 -
  22.196 -long int vbd_size(blkif_t *blkif)
  22.197 -{
  22.198 -    return 1000000000;
  22.199 -}
  22.200 -
  22.201 -long int vbd_secsize(blkif_t *blkif)
  22.202 -{
  22.203 -    return 512;
  22.204 -}
  22.205 -
  22.206 -unsigned vbd_info(blkif_t *blkif)
  22.207 -{
  22.208 -    return 0;
  22.209 -}
  22.210 -
  22.211 -
  22.212 -void __init_blkif(void)
  22.213 -{    
  22.214 -    memset(blkif_hash, 0, sizeof(blkif_hash));
  22.215 -}
    23.1 --- a/tools/blktap/blktaplib.c	Fri Jun 23 15:26:01 2006 -0600
    23.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.3 @@ -1,453 +0,0 @@
    23.4 -/*
    23.5 - * blktaplib.c
    23.6 - * 
    23.7 - * userspace interface routines for the blktap driver.
    23.8 - *
    23.9 - * (threadsafe(r) version) 
   23.10 - *
   23.11 - * (c) 2004 Andrew Warfield.
   23.12 - */
   23.13 -
   23.14 -#include <stdio.h>
   23.15 -#include <stdlib.h>
   23.16 -#include <sys/mman.h>
   23.17 -#include <sys/user.h>
   23.18 -#include <err.h>
   23.19 -#include <errno.h>
   23.20 -#include <sys/types.h>
   23.21 -#include <linux/types.h>
   23.22 -#include <sys/stat.h>
   23.23 -#include <fcntl.h>
   23.24 -#include <signal.h>
   23.25 -#include <sys/poll.h>
   23.26 -#include <sys/ioctl.h>
   23.27 -#include <string.h>
   23.28 -#include <unistd.h>
   23.29 -#include <pthread.h>
   23.30 -#include <xs.h>
   23.31 -                                                                     
   23.32 -#define __COMPILING_BLKTAP_LIB
   23.33 -#include "blktaplib.h"
   23.34 -
   23.35 -#if 0
   23.36 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   23.37 -#else
   23.38 -#define DPRINTF(_f, _a...) ((void)0)
   23.39 -#endif
   23.40 -#define DEBUG_RING_IDXS 0
   23.41 -
   23.42 -#define POLLRDNORM     0x040 
   23.43 -
   23.44 -#define BLKTAP_IOCTL_KICK 1
   23.45 -
   23.46 -
   23.47 -void got_sig_bus();
   23.48 -void got_sig_int();
   23.49 -
   23.50 -/* in kernel these are opposite, but we are a consumer now. */
   23.51 -blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
   23.52 -blkif_front_ring_t be_ring; 
   23.53 -
   23.54 -unsigned long mmap_vstart = 0;
   23.55 -char *blktap_mem;
   23.56 -int fd = 0;
   23.57 -
   23.58 -#define BLKTAP_RING_PAGES       1 /* Front */
   23.59 -#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + MMAP_PAGES)
   23.60 -    
   23.61 -int bad_count = 0;
   23.62 -void bad(void)
   23.63 -{
   23.64 -    bad_count ++;
   23.65 -    if (bad_count > 50) exit(0);
   23.66 -}
   23.67 -/*-----[ ID Manipulation from tap driver code ]--------------------------*/
   23.68 -
   23.69 -#define ACTIVE_RING_IDX unsigned short
   23.70 -
   23.71 -inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
   23.72 -{
   23.73 -    return ( (fe_dom << 16) | idx );
   23.74 -}
   23.75 -
   23.76 -inline unsigned int ID_TO_IDX(unsigned long id) 
   23.77 -{ 
   23.78 -        return ( id & 0x0000ffff );
   23.79 -}
   23.80 -
   23.81 -inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); }
   23.82 -
   23.83 -static int (*request_hook)(blkif_request_t *req) = NULL;
   23.84 -static int (*response_hook)(blkif_response_t *req) = NULL;
   23.85 -        
   23.86 -/*-----[ Data to/from Backend (server) VM ]------------------------------*/
   23.87 -
   23.88 -/*
   23.89 -
   23.90 -inline int write_req_to_be_ring(blkif_request_t *req)
   23.91 -{
   23.92 -    blkif_request_t *req_d;
   23.93 -    static pthread_mutex_t be_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
   23.94 -
   23.95 -    pthread_mutex_lock(&be_prod_mutex);
   23.96 -    req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
   23.97 -    memcpy(req_d, req, sizeof(blkif_request_t));
   23.98 -    wmb();
   23.99 -    be_ring.req_prod_pvt++;
  23.100 -    pthread_mutex_unlock(&be_prod_mutex);
  23.101 -    
  23.102 -    return 0;
  23.103 -}
  23.104 -*/
  23.105 -
  23.106 -inline int write_rsp_to_fe_ring(blkif_response_t *rsp)
  23.107 -{
  23.108 -    blkif_response_t *rsp_d;
  23.109 -    static pthread_mutex_t fe_prod_mutex = PTHREAD_MUTEX_INITIALIZER;
  23.110 -
  23.111 -    pthread_mutex_lock(&fe_prod_mutex);
  23.112 -    rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
  23.113 -    memcpy(rsp_d, rsp, sizeof(blkif_response_t));
  23.114 -    wmb();
  23.115 -    fe_ring.rsp_prod_pvt++;
  23.116 -    pthread_mutex_unlock(&fe_prod_mutex);
  23.117 -
  23.118 -    return 0;
  23.119 -}
  23.120 -
  23.121 -static void apply_rsp_hooks(blkif_t *blkif, blkif_response_t *rsp)
  23.122 -{
  23.123 -    response_hook_t  *rsp_hook;
  23.124 -    
  23.125 -    rsp_hook = blkif->response_hook_chain;
  23.126 -    while (rsp_hook != NULL)
  23.127 -    {
  23.128 -        switch(rsp_hook->func(blkif, rsp, 1))
  23.129 -        {
  23.130 -        case BLKTAP_PASS:
  23.131 -            break;
  23.132 -        default:
  23.133 -            printf("Only PASS is supported for resp hooks!\n");
  23.134 -        }
  23.135 -        rsp_hook = rsp_hook->next;
  23.136 -    }
  23.137 -}
  23.138 -
  23.139 -
  23.140 -static pthread_mutex_t push_mutex = PTHREAD_MUTEX_INITIALIZER;
  23.141 -
  23.142 -void blkif_inject_response(blkif_t *blkif, blkif_response_t *rsp)
  23.143 -{
  23.144 -    
  23.145 -    apply_rsp_hooks(blkif, rsp);
  23.146 -  
  23.147 -    write_rsp_to_fe_ring(rsp);
  23.148 -}
  23.149 -
  23.150 -void blktap_kick_responses(void)
  23.151 -{
  23.152 -    pthread_mutex_lock(&push_mutex);
  23.153 -    
  23.154 -    RING_PUSH_RESPONSES(&fe_ring);
  23.155 -    ioctl(fd, BLKTAP_IOCTL_KICK_FE);
  23.156 -    
  23.157 -    pthread_mutex_unlock(&push_mutex);
  23.158 -}
  23.159 -
  23.160 -/*-----[ Polling fd listeners ]------------------------------------------*/
  23.161 -
  23.162 -#define MAX_POLLFDS 64
  23.163 -
  23.164 -typedef struct {
  23.165 -    int (*func)(int fd);
  23.166 -    struct pollfd *pfd;
  23.167 -    int fd;
  23.168 -    short events;
  23.169 -    int active;
  23.170 -} pollhook_t;
  23.171 -
  23.172 -static struct pollfd  pfd[MAX_POLLFDS+2]; /* tap and store are extra */
  23.173 -static pollhook_t     pollhooks[MAX_POLLFDS];
  23.174 -static unsigned int   ph_freelist[MAX_POLLFDS];
  23.175 -static unsigned int   ph_cons, ph_prod;
  23.176 -#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons))
  23.177 -#define PH_IDX(x) (x % MAX_POLLFDS)
  23.178 -
  23.179 -int blktap_attach_poll(int fd, short events, int (*func)(int fd))
  23.180 -{
  23.181 -    pollhook_t *ph;
  23.182 -    
  23.183 -    if (nr_pollhooks() == MAX_POLLFDS) {
  23.184 -        printf("Too many pollhooks!\n");
  23.185 -        return -1;
  23.186 -    }
  23.187 -    
  23.188 -    ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]];
  23.189 -    
  23.190 -    ph->func        = func;
  23.191 -    ph->fd          = fd;
  23.192 -    ph->events      = events;
  23.193 -    ph->active      = 1;
  23.194 -    
  23.195 -    DPRINTF("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, 
  23.196 -            nr_pollhooks());
  23.197 -    
  23.198 -    return 0;
  23.199 -}
  23.200 -
  23.201 -void blktap_detach_poll(int fd)
  23.202 -{
  23.203 -    int i;
  23.204 -    
  23.205 -    for (i=0; i<MAX_POLLFDS; i++)
  23.206 -        if ((pollhooks[i].active) && (pollhooks[i].pfd->fd == fd)) {
  23.207 -            ph_freelist[PH_IDX(ph_prod++)] = i;
  23.208 -            pollhooks[i].pfd->fd = -1;
  23.209 -            pollhooks[i].active = 0;
  23.210 -            break;
  23.211 -        }
  23.212 -        
  23.213 -    DPRINTF("Removed fd %d at ph index %d, now %d phs.\n", fd, i, 
  23.214 -            nr_pollhooks());
  23.215 -}
  23.216 -
  23.217 -void pollhook_init(void)
  23.218 -{
  23.219 -    int i;
  23.220 -    
  23.221 -    for (i=0; i < MAX_POLLFDS; i++) {
  23.222 -        ph_freelist[i] = (i+1) % MAX_POLLFDS;
  23.223 -        pollhooks[i].active = 0;
  23.224 -    }
  23.225 -    
  23.226 -    ph_cons = 0;
  23.227 -    ph_prod = MAX_POLLFDS;
  23.228 -}
  23.229 -
  23.230 -void __attribute__ ((constructor)) blktaplib_init(void)
  23.231 -{
  23.232 -    pollhook_init();
  23.233 -}
  23.234 -
  23.235 -/*-----[ The main listen loop ]------------------------------------------*/
  23.236 -
  23.237 -int blktap_listen(void)
  23.238 -{
  23.239 -    int notify_be, notify_fe, tap_pfd, store_pfd, xs_fd, ret;
  23.240 -    struct xs_handle *h;
  23.241 -    blkif_t *blkif;
  23.242 -
  23.243 -    /* comms rings: */
  23.244 -    blkif_request_t  *req;
  23.245 -    blkif_response_t *rsp;
  23.246 -    blkif_sring_t    *sring;
  23.247 -    RING_IDX          rp, i, pfd_count; 
  23.248 -    
  23.249 -    /* pending rings */
  23.250 -    blkif_request_t req_pending[BLK_RING_SIZE];
  23.251 -    /* blkif_response_t rsp_pending[BLK_RING_SIZE] */;
  23.252 -    
  23.253 -    /* handler hooks: */
  23.254 -    request_hook_t   *req_hook;
  23.255 -    response_hook_t  *rsp_hook;
  23.256 -    
  23.257 -    signal (SIGBUS, got_sig_bus);
  23.258 -    signal (SIGINT, got_sig_int);
  23.259 -    
  23.260 -    __init_blkif();
  23.261 -
  23.262 -    fd = open("/dev/blktap", O_RDWR);
  23.263 -    if (fd == -1)
  23.264 -        err(-1, "open failed!");
  23.265 -
  23.266 -    blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, 
  23.267 -             PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  23.268 -
  23.269 -    if ((int)blktap_mem == -1) 
  23.270 -        err(-1, "mmap failed!");
  23.271 -
  23.272 -    /* assign the rings to the mapped memory */
  23.273 -/*
  23.274 -    sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
  23.275 -    FRONT_RING_INIT(&be_ring, sring, PAGE_SIZE);
  23.276 -*/  
  23.277 -    sring = (blkif_sring_t *)((unsigned long)blktap_mem);
  23.278 -    BACK_RING_INIT(&fe_ring, sring, PAGE_SIZE);
  23.279 -
  23.280 -    mmap_vstart = (unsigned long)blktap_mem +(BLKTAP_RING_PAGES << PAGE_SHIFT);
  23.281 -
  23.282 -
  23.283 -    /* Set up store connection and watch. */
  23.284 -    h = xs_daemon_open();
  23.285 -    if (h == NULL) 
  23.286 -        err(-1, "xs_daemon_open");
  23.287 -    
  23.288 -    ret = add_blockdevice_probe_watch(h, "Domain-0");
  23.289 -    if (ret != 0)
  23.290 -        err(0, "adding device probewatch");
  23.291 -    
  23.292 -    ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE );
  23.293 -
  23.294 -    while(1) {
  23.295 -        int ret;
  23.296 -        
  23.297 -        /* build the poll list */
  23.298 -        pfd_count = 0;
  23.299 -        for ( i=0; i < MAX_POLLFDS; i++ ) {
  23.300 -            pollhook_t *ph = &pollhooks[i];
  23.301 -            
  23.302 -            if (ph->active) {
  23.303 -                pfd[pfd_count].fd     = ph->fd;
  23.304 -                pfd[pfd_count].events = ph->events;
  23.305 -                ph->pfd               = &pfd[pfd_count];
  23.306 -                pfd_count++;
  23.307 -            }
  23.308 -        }
  23.309 -
  23.310 -        tap_pfd = pfd_count++;
  23.311 -        pfd[tap_pfd].fd = fd;
  23.312 -        pfd[tap_pfd].events = POLLIN;
  23.313 -
  23.314 -        store_pfd = pfd_count++;
  23.315 -        pfd[store_pfd].fd = xs_fileno(h);
  23.316 -        pfd[store_pfd].events = POLLIN;
  23.317 -        
  23.318 -        if ( (ret = (poll(pfd, pfd_count, 10000)) == 0) ) {
  23.319 -            if (DEBUG_RING_IDXS)
  23.320 -                ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS);
  23.321 -            continue;
  23.322 -        }
  23.323 -
  23.324 -        for (i=0; i < MAX_POLLFDS; i++) {
  23.325 -            if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) )
  23.326 -                pollhooks[i].func(pollhooks[i].pfd->fd);
  23.327 -        }
  23.328 -        
  23.329 -        if (pfd[store_pfd].revents) {
  23.330 -            ret = xs_fire_next_watch(h);
  23.331 -        }
  23.332 -
  23.333 -        if (pfd[tap_pfd].revents) 
  23.334 -        {    
  23.335 -            /* empty the fe_ring */
  23.336 -            notify_fe = 0;
  23.337 -            notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
  23.338 -            rp = fe_ring.sring->req_prod;
  23.339 -            rmb();
  23.340 -            for (i = fe_ring.req_cons; i != rp; i++)
  23.341 -            {
  23.342 -                int done = 0; 
  23.343 -
  23.344 -                req = RING_GET_REQUEST(&fe_ring, i);
  23.345 -                memcpy(&req_pending[ID_TO_IDX(req->id)], req, sizeof(*req));
  23.346 -                req = &req_pending[ID_TO_IDX(req->id)];
  23.347 -
  23.348 -                blkif = blkif_find_by_handle(ID_TO_DOM(req->id), req->handle);
  23.349 -
  23.350 -                if (blkif != NULL)
  23.351 -                {
  23.352 -                    req_hook = blkif->request_hook_chain;
  23.353 -                    while (req_hook != NULL)
  23.354 -                    {
  23.355 -                        switch(req_hook->func(blkif, req, ((i+1) == rp)))
  23.356 -                        {
  23.357 -                        case BLKTAP_RESPOND:
  23.358 -                            apply_rsp_hooks(blkif, (blkif_response_t *)req);
  23.359 -                            write_rsp_to_fe_ring((blkif_response_t *)req);
  23.360 -                            notify_fe = 1;
  23.361 -                            done = 1;
  23.362 -                            break;
  23.363 -                        case BLKTAP_STOLEN:
  23.364 -                            done = 1;
  23.365 -                            break;
  23.366 -                        case BLKTAP_PASS:
  23.367 -                            break;
  23.368 -                        default:
  23.369 -                            printf("Unknown request hook return value!\n");
  23.370 -                        }
  23.371 -                        if (done) break;
  23.372 -                        req_hook = req_hook->next;
  23.373 -                    }
  23.374 -                }
  23.375 -
  23.376 -                if (done == 0) 
  23.377 -                {
  23.378 -                    /* this was:  */
  23.379 -                    /* write_req_to_be_ring(req); */
  23.380 -
  23.381 -                    unsigned long id = req->id;
  23.382 -                    unsigned short operation = req->operation;
  23.383 -                    printf("Unterminated request!\n");
  23.384 -                    rsp = (blkif_response_t *)req;
  23.385 -                    rsp->id = id;
  23.386 -                    rsp->operation = operation;
  23.387 -                    rsp->status = BLKIF_RSP_ERROR;
  23.388 -                    write_rsp_to_fe_ring(rsp);
  23.389 -                    notify_fe = 1;
  23.390 -                    done = 1;
  23.391 -                }
  23.392 -
  23.393 -            }
  23.394 -            fe_ring.req_cons = i;
  23.395 -
  23.396 -            /* empty the be_ring */
  23.397 -/*
  23.398 -            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
  23.399 -            rp = be_ring.sring->rsp_prod;
  23.400 -            rmb();
  23.401 -            for (i = be_ring.rsp_cons; i != rp; i++)
  23.402 -            {
  23.403 -
  23.404 -                rsp = RING_GET_RESPONSE(&be_ring, i);
  23.405 -                memcpy(&rsp_pending[ID_TO_IDX(rsp->id)], rsp, sizeof(*rsp));
  23.406 -                rsp = &rsp_pending[ID_TO_IDX(rsp->id)];
  23.407 -
  23.408 -                DPRINTF("copying a be request\n");
  23.409 -
  23.410 -                apply_rsp_hooks(rsp);
  23.411 -                write_rsp_to_fe_ring(rsp);
  23.412 -            }
  23.413 -            be_ring.rsp_cons = i;
  23.414 -*/
  23.415 -            /* notify the domains */
  23.416 -/*
  23.417 -            if (notify_be) {
  23.418 -                DPRINTF("notifying be\n");
  23.419 -pthread_mutex_lock(&push_mutex);
  23.420 -                RING_PUSH_REQUESTS(&be_ring);
  23.421 -                ioctl(fd, BLKTAP_IOCTL_KICK_BE);
  23.422 -pthread_mutex_unlock(&push_mutex);
  23.423 -            }
  23.424 -*/
  23.425 -            if (notify_fe) {
  23.426 -                DPRINTF("notifying fe\n");
  23.427 -                pthread_mutex_lock(&push_mutex);
  23.428 -                RING_PUSH_RESPONSES(&fe_ring);
  23.429 -                ioctl(fd, BLKTAP_IOCTL_KICK_FE);
  23.430 -                pthread_mutex_unlock(&push_mutex);
  23.431 -            }
  23.432 -        }        
  23.433 -    }
  23.434 -
  23.435 -
  23.436 -    munmap(blktap_mem, PAGE_SIZE);
  23.437 -
  23.438 - mmap_failed:
  23.439 -    close(fd);
  23.440 -
  23.441 - open_failed:
  23.442 -    return 0;
  23.443 -}
  23.444 -
  23.445 -void got_sig_bus() {
  23.446 -    printf("Attempted to access a page that isn't.\n");
  23.447 -    exit(-1);
  23.448 -}
  23.449 -
  23.450 -void got_sig_int() {
  23.451 -    DPRINTF("quitting -- returning to passthrough mode.\n");
  23.452 -    if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH );
  23.453 -    close(fd);
  23.454 -    fd = 0;
  23.455 -    exit(0);
  23.456 -} 
    24.1 --- a/tools/blktap/blktaplib.h	Fri Jun 23 15:26:01 2006 -0600
    24.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.3 @@ -1,171 +0,0 @@
    24.4 -/* blktaplib.h
    24.5 - *
    24.6 - * userland accessors to the block tap.
    24.7 - *
    24.8 - * Sept 2/05 -- I'm scaling this back to only support block remappings
    24.9 - * to user in a backend domain.  Passthrough and interposition can be readded
   24.10 - * once transitive grants are available.
   24.11 - */
   24.12 - 
   24.13 -#ifndef __BLKTAPLIB_H__
   24.14 -#define __BLKTAPLIB_H__
   24.15 -
   24.16 -#include <xenctrl.h>
   24.17 -#include <sys/user.h>
   24.18 -#include <xen/xen.h>
   24.19 -#include <xen/io/blkif.h>
   24.20 -#include <xen/io/ring.h>
   24.21 -#include <xen/io/domain_controller.h>
   24.22 -#include <xs.h>
   24.23 -
   24.24 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
   24.25 -
   24.26 -/* /dev/xen/blktap resides at device number major=10, minor=202        */ 
   24.27 -#define BLKTAP_MINOR 202
   24.28 -
   24.29 -/* size of the extra VMA area to map in attached pages. */
   24.30 -#define BLKTAP_VMA_PAGES BLK_RING_SIZE
   24.31 -
   24.32 -/* blktap IOCTLs:                                                      */
   24.33 -#define BLKTAP_IOCTL_KICK_FE         1
   24.34 -#define BLKTAP_IOCTL_KICK_BE         2
   24.35 -#define BLKTAP_IOCTL_SETMODE         3
   24.36 -#define BLKTAP_IOCTL_PRINT_IDXS      100   
   24.37 -
   24.38 -/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
   24.39 -#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
   24.40 -#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
   24.41 -#define BLKTAP_MODE_INTERCEPT_BE     0x00000002
   24.42 -#define BLKTAP_MODE_COPY_FE          0x00000004
   24.43 -#define BLKTAP_MODE_COPY_BE          0x00000008
   24.44 -#define BLKTAP_MODE_COPY_FE_PAGES    0x00000010
   24.45 -#define BLKTAP_MODE_COPY_BE_PAGES    0x00000020
   24.46 -
   24.47 -#define BLKTAP_MODE_INTERPOSE \
   24.48 -           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
   24.49 -
   24.50 -#define BLKTAP_MODE_COPY_BOTH \
   24.51 -           (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
   24.52 -
   24.53 -#define BLKTAP_MODE_COPY_BOTH_PAGES \
   24.54 -           (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
   24.55 -
   24.56 -static inline int BLKTAP_MODE_VALID(unsigned long arg)
   24.57 -{
   24.58 -    return (
   24.59 -        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
   24.60 -        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
   24.61 -        ( arg == BLKTAP_MODE_INTERPOSE    ) );
   24.62 -/*
   24.63 -    return (
   24.64 -        ( arg == BLKTAP_MODE_PASSTHROUGH  ) ||
   24.65 -        ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
   24.66 -        ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
   24.67 -        ( arg == BLKTAP_MODE_INTERPOSE    ) ||
   24.68 -        ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
   24.69 -        ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
   24.70 -        ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
   24.71 -        );
   24.72 -*/
   24.73 -}
   24.74 -
   24.75 -/* Return values for handling messages in hooks. */
   24.76 -#define BLKTAP_PASS     0 /* Keep passing this request as normal. */
   24.77 -#define BLKTAP_RESPOND  1 /* Request is now a reply.  Return it.  */
   24.78 -#define BLKTAP_STOLEN   2 /* Hook has stolen request.             */
   24.79 -
   24.80 -//#define domid_t unsigned short
   24.81 -
   24.82 -inline unsigned int ID_TO_IDX(unsigned long id);
   24.83 -inline domid_t ID_TO_DOM(unsigned long id);
   24.84 -
   24.85 -int  blktap_attach_poll(int fd, short events, int (*func)(int));
   24.86 -void blktap_detach_poll(int fd);
   24.87 -int  blktap_listen(void);
   24.88 -
   24.89 -struct blkif;
   24.90 -
   24.91 -typedef struct request_hook_st {
   24.92 -    char *name;
   24.93 -    int (*func)(struct blkif *, blkif_request_t *, int);
   24.94 -    struct request_hook_st *next;
   24.95 -} request_hook_t;
   24.96 -
   24.97 -typedef struct response_hook_st {
   24.98 -    char *name;
   24.99 -    int (*func)(struct blkif *, blkif_response_t *, int);
  24.100 -    struct response_hook_st *next;
  24.101 -} response_hook_t;
  24.102 -
  24.103 -struct blkif_ops {
  24.104 -    long int (*get_size)(struct blkif *blkif);
  24.105 -    long int (*get_secsize)(struct blkif *blkif);
  24.106 -    unsigned (*get_info)(struct blkif *blkif);
  24.107 -};
  24.108 -
  24.109 -typedef struct blkif {
  24.110 -    domid_t domid;
  24.111 -    long int handle;
  24.112 -
  24.113 -    long int pdev;
  24.114 -    long int readonly;
  24.115 -
  24.116 -    enum { DISCONNECTED, CONNECTED } state;
  24.117 -
  24.118 -    struct blkif_ops *ops;
  24.119 -    request_hook_t *request_hook_chain;
  24.120 -    response_hook_t *response_hook_chain;
  24.121 -
  24.122 -    struct blkif *hash_next;
  24.123 -
  24.124 -    void *prv;  /* device-specific data */
  24.125 -} blkif_t;
  24.126 -
  24.127 -void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
  24.128 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
  24.129 -blkif_t *alloc_blkif(domid_t domid);
  24.130 -int blkif_init(blkif_t *blkif, long int handle, long int pdev, 
  24.131 -               long int readonly);
  24.132 -void free_blkif(blkif_t *blkif);
  24.133 -void __init_blkif(void);
  24.134 -
  24.135 -
  24.136 -/* xenstore/xenbus: */
  24.137 -extern int add_blockdevice_probe_watch(struct xs_handle *h, 
  24.138 -                                       const char *domname);
  24.139 -int xs_fire_next_watch(struct xs_handle *h);
  24.140 -
  24.141 -
  24.142 -void blkif_print_hooks(blkif_t *blkif);
  24.143 -void blkif_register_request_hook(blkif_t *blkif, char *name, 
  24.144 -                             int (*rh)(blkif_t *, blkif_request_t *, int));
  24.145 -void blkif_register_response_hook(blkif_t *blkif, char *name, 
  24.146 -                             int (*rh)(blkif_t *, blkif_response_t *, int));
  24.147 -void blkif_inject_response(blkif_t *blkif, blkif_response_t *);
  24.148 -void blktap_kick_responses(void);
  24.149 -
  24.150 -/* this must match the underlying driver... */
  24.151 -#define MAX_PENDING_REQS 64
  24.152 -
  24.153 -/* Accessing attached data page mappings */
  24.154 -#define MMAP_PAGES                                              \
  24.155 -    (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
  24.156 -#define MMAP_VADDR(_req,_seg)                                   \
  24.157 -    (mmap_vstart +                                              \
  24.158 -     ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +    \
  24.159 -     ((_seg) * PAGE_SIZE))
  24.160 -
  24.161 -extern unsigned long mmap_vstart;
  24.162 -
  24.163 -/* Defines that are only used by library clients */
  24.164 -
  24.165 -#ifndef __COMPILING_BLKTAP_LIB
  24.166 -
  24.167 -static char *blkif_op_name[] = {
  24.168 -    [BLKIF_OP_READ]       = "READ",
  24.169 -    [BLKIF_OP_WRITE]      = "WRITE",
  24.170 -};
  24.171 -
  24.172 -#endif /* __COMPILING_BLKTAP_LIB */
  24.173 -    
  24.174 -#endif /* __BLKTAPLIB_H__ */
    25.1 --- a/tools/blktap/list.h	Fri Jun 23 15:26:01 2006 -0600
    25.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.3 @@ -1,55 +0,0 @@
    25.4 -/*
    25.5 - * list.h
    25.6 - * 
    25.7 - * This is a subset of linux's list.h intended to be used in user-space.
    25.8 - * 
    25.9 - */
   25.10 -
   25.11 -#ifndef __LIST_H__
   25.12 -#define __LIST_H__
   25.13 -
   25.14 -#define LIST_POISON1  ((void *) 0x00100100)
   25.15 -#define LIST_POISON2  ((void *) 0x00200200)
   25.16 -
   25.17 -struct list_head {
   25.18 -        struct list_head *next, *prev;
   25.19 -};
   25.20 - 
   25.21 -#define LIST_HEAD_INIT(name) { &(name), &(name) }
   25.22 - 
   25.23 -#define LIST_HEAD(name) \
   25.24 -        struct list_head name = LIST_HEAD_INIT(name)
   25.25 -
   25.26 -static inline void __list_add(struct list_head *new,
   25.27 -                              struct list_head *prev,
   25.28 -                              struct list_head *next)
   25.29 -{
   25.30 -        next->prev = new;
   25.31 -        new->next = next;
   25.32 -        new->prev = prev;
   25.33 -        prev->next = new;
   25.34 -}
   25.35 -
   25.36 -static inline void list_add(struct list_head *new, struct list_head *head)
   25.37 -{
   25.38 -        __list_add(new, head, head->next);
   25.39 -}
   25.40 -static inline void __list_del(struct list_head * prev, struct list_head * next)
   25.41 -{
   25.42 -        next->prev = prev;
   25.43 -        prev->next = next;
   25.44 -}
   25.45 -static inline void list_del(struct list_head *entry)
   25.46 -{
   25.47 -        __list_del(entry->prev, entry->next);
   25.48 -        entry->next = LIST_POISON1;
   25.49 -        entry->prev = LIST_POISON2;
   25.50 -}
   25.51 -#define list_entry(ptr, type, member)                                   \
   25.52 -        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
   25.53 -#define list_for_each_entry(pos, head, member)                          \
   25.54 -        for (pos = list_entry((head)->next, typeof(*pos), member);      \
   25.55 -             &pos->member != (head);                                    \
   25.56 -             pos = list_entry(pos->member.next, typeof(*pos), member))
   25.57 -
   25.58 -#endif /* __LIST_H__ */
    26.1 --- a/tools/blktap/parallax/Makefile	Fri Jun 23 15:26:01 2006 -0600
    26.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.3 @@ -1,63 +0,0 @@
    26.4 -XEN_ROOT = ../../..
    26.5 -include $(XEN_ROOT)/tools/Rules.mk
    26.6 -
    26.7 -PARALLAX_INSTALL_DIR	= /usr/sbin
    26.8 -
    26.9 -INSTALL         = install
   26.10 -INSTALL_PROG    = $(INSTALL) -m0755
   26.11 -INSTALL_DIR     = $(INSTALL) -d -m0755
   26.12 -
   26.13 -INCLUDES += -I.. -I/usr/include -I $(XEN_LIBXC)
   26.14 -
   26.15 -LDFLAGS = -L.. -lpthread -lz -lblktap
   26.16 -
   26.17 -#PLX_SRCS := 
   26.18 -PLX_SRCS := vdi.c 
   26.19 -PLX_SRCS += radix.c 
   26.20 -PLX_SRCS += snaplog.c
   26.21 -PLX_SRCS += blockstore.c 
   26.22 -PLX_SRCS += block-async.c
   26.23 -PLX_SRCS += requests-async.c
   26.24 -VDI_SRCS := $(PLX_SRCS)
   26.25 -PLX_SRCS += parallax.c
   26.26 -
   26.27 -#VDI_TOOLS :=
   26.28 -VDI_TOOLS := vdi_create
   26.29 -VDI_TOOLS += vdi_list
   26.30 -VDI_TOOLS += vdi_snap
   26.31 -VDI_TOOLS += vdi_snap_list
   26.32 -VDI_TOOLS += vdi_snap_delete
   26.33 -VDI_TOOLS += vdi_fill
   26.34 -VDI_TOOLS += vdi_tree
   26.35 -VDI_TOOLS += vdi_validate
   26.36 -
   26.37 -CFLAGS   += -Werror
   26.38 -CFLAGS   += -Wno-unused
   26.39 -CFLAGS   += -fno-strict-aliasing
   26.40 -CFLAGS   += $(INCLUDES)
   26.41 -CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
   26.42 -# Get gcc to generate the dependencies for us.
   26.43 -CFLAGS   += -Wp,-MD,.$(@F).d
   26.44 -DEPS     = .*.d
   26.45 -
   26.46 -OBJS     = $(patsubst %.c,%.o,$(SRCS))
   26.47 -IBINS    = parallax $(VDI_TOOLS)
   26.48 -
   26.49 -.PHONY: all
   26.50 -all: $(VDI_TOOLS) parallax blockstored
   26.51 -
   26.52 -.PHONY: install
   26.53 -install: all
   26.54 -	$(INSTALL_PROG) $(IBINS) $(DESTDIR)$(PARALLAX_INSTALL_DIR)
   26.55 -
   26.56 -.PHONY: clean
   26.57 -clean:
   26.58 -	rm -rf *.o *~ $(DEPS) xen TAGS $(VDI_TOOLS) parallax vdi_unittest
   26.59 -
   26.60 -parallax: $(PLX_SRCS)
   26.61 -	$(CC) $(CFLAGS) -o parallax -L.. $(LDFLAGS) $(PLX_SRCS)
   26.62 -
   26.63 -${VDI_TOOLS}: %: %.c $(VDI_SRCS)
   26.64 -	$(CC) $(CFLAGS) -o $@ $@.c $(LDFLAGS) $(VDI_SRCS)
   26.65 -
   26.66 --include $(DEPS)
    27.1 --- a/tools/blktap/parallax/README	Fri Jun 23 15:26:01 2006 -0600
    27.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.3 @@ -1,177 +0,0 @@
    27.4 -Parallax Quick Overview
    27.5 -March 3, 2005
    27.6 -
    27.7 -This is intended to provide a quick set of instructions to let you
    27.8 -guys play with the current parallax source.  In it's current form, the
    27.9 -code will let you run an arbitrary number of VMs off of a single disk
   27.10 -image, doing copy-on-write as they make updates.  Each domain is
   27.11 -assigned a virtual disk image (VDI), which may be based on a snapshot
   27.12 -of an existing image.  All of the VDI and snapshot management should
   27.13 -currently work.
   27.14 -
   27.15 -The current implementation uses a single file as a blockstore for
   27.16 -_everything_ this will soon be replaced by the fancier backend code
   27.17 -and the local cache.  As it stands, Parallax will create
   27.18 -"blockstore.dat" in the directory that you run it from, and use
   27.19 -largefile support to make this grow to unfathomable girth.  So, you
   27.20 -probably want to run the daemon off of a local disk, with a lot of
   27.21 -free space.
   27.22 -
   27.23 -Here's how to get going:
   27.24 -
   27.25 -0. Setup:
   27.26 ----------
   27.27 -
   27.28 -Pick a local directory on a disk with lots of room.  You should be
   27.29 -running from a privileged domain (e.g. dom0) with the blocktap
   27.30 -configured in and block backend NOT.
   27.31 -
   27.32 -For convenience (for the moment) copy all of the vdi tools (vdi_*) and
   27.33 -the parallax daemon from tools/blktap into this directory.
   27.34 -
   27.35 -1. Populate the blockstore:
   27.36 ----------------------------
   27.37 -
   27.38 -First you need to put at least one image into the blockstore.  You
   27.39 -will need a disk image, either as a file or local partition.  My
   27.40 -general approach has been to
   27.41 -
   27.42 -(a) make a really big sparse file with 
   27.43 -
   27.44 -        dd if=/dev/zero of=./image bs=4K count=1 seek=[big value]
   27.45 -
   27.46 -(b) put a filesystem into it
   27.47 -
   27.48 -        mkfs.ext3 ./image
   27.49 -
   27.50 -(c) mount it using loopback
   27.51 -
   27.52 -        mkdir ./mnt
   27.53 -        mount -o loop ./image
   27.54 -
   27.55 -(d) cd into it and untar one of the image files from srg-roots.
   27.56 -
   27.57 -        cd mnt
   27.58 -        tar ...
   27.59 -
   27.60 -NOTE: Beware if your system is FC3.  mkfs is not compatible with old
   27.61 -versions of fedora, and so you don't have much choice but to install
   27.62 -further fc3 images if you have used the fc3 version of mkfs.
   27.63 -
   27.64 -(e) unmount the image
   27.65 -
   27.66 -        cd ..
   27.67 -        umount mnt
   27.68 -
   27.69 -(f) now, create a new VDI to hold the image 
   27.70 -
   27.71 -        ./vdi_create "My new FC3 VDI"
   27.72 -
   27.73 -(g) get the id of the new VDI.
   27.74 -
   27.75 -        ./vdi_list
   27.76 -
   27.77 -        |      0                     My new FC3 VDI
   27.78 -
   27.79 -(0 is the VDI id... create a few more if you want.)
   27.80 -
   27.81 -(h) hoover your image into the new VDI.
   27.82 -
   27.83 -        ./vdi_fill 0 ./image
   27.84 -
   27.85 -This will pull the entire image into the blockstore and set up a
   27.86 -mapping tree for it for VDI 0.  Passing a device (i.e. /dev/sda3)
   27.87 -should also work, but vdi_fill has NO notion of sparseness yet, so you
   27.88 -are going to pump a block into the store for each block you read.
   27.89 -
   27.90 -vdi_fill will count up until it is done, and you should be ready to
   27.91 -go.  If you want to be anal, you can use vdi_validate to test the VDI
   27.92 -against the original image.
   27.93 -
   27.94 -2. Create some extra VDIs
   27.95 --------------------------
   27.96 -
   27.97 -VDIs are actually a list of snapshots, and each snapshot is a full
   27.98 -image of mappings.  So, to preserve an immutable copy of a current
   27.99 -VDI, do this:
  27.100 -
  27.101 -(a) Snapshot your new VDI.
  27.102 -
  27.103 -        ./vdi_snap 0
  27.104 -
  27.105 -Snapshotting writes the current radix root to the VDI's snapshot log,
  27.106 -and assigns it a new writable root.
  27.107 -
  27.108 -(b) look at the VDI's snapshot log.
  27.109 -
  27.110 -        ./vdi_snap_list 0
  27.111 -
  27.112 -        | 16   0      Thu Mar  3 19:27:48 2005 565111           31
  27.113 -
  27.114 -The first two columns constitute a snapshot id and represent the
  27.115 -(block, offset) of the snapshot record.  The Date tells you when the
  27.116 -snapshot was made, and 31 is the radix root node of the snapshot.
  27.117 -
  27.118 -(c) Create a new VDI, based on that snapshot, and look at the list.
  27.119 -
  27.120 -        ./vdi_create "FC3 - Copy 1" 16 0
  27.121 -        ./vdi_list
  27.122 -
  27.123 -        |      0                     My new FC3 VDI
  27.124 -        |      1                       FC3 - Copy 1
  27.125 -
  27.126 -NOTE: If you have Graphviz installed on your system, you can use
  27.127 -vdi_tree to generate a postscript of your current set of VDIs and
  27.128 -snapshots.
  27.129 -
  27.130 -
  27.131 -Create as many VDIs as you need for the VMs that you want to run.
  27.132 -
  27.133 -3. Boot some VMs:
  27.134 ------------------
  27.135 -
  27.136 -Parallax currently uses a hack in xend to pass the VDI id, you need to
  27.137 -modify the disk line of the VM config that is going to mount it.
  27.138 -
  27.139 -(a) set up your vm config, by using the following disk line:
  27.140 -
  27.141 -        disk = ['parallax:1,sda1,w,0' ]
  27.142 -
  27.143 -This example uses VDI 1 (from vdi_list above), presents it as sda1
  27.144 -(writable), and uses dom 0 as the backend.  If you were running the
  27.145 -daemon (and tap driver) in some domain other than 0, you would change
  27.146 -this last parameter.
  27.147 -
  27.148 -NOTE: You'll need to have reinstalled xend/tools prior to booting the vm, so that it knows what to do with "parallax:".
  27.149 -
  27.150 -(b) Run parallax in the backend domain.
  27.151 -
  27.152 -        ./parallax
  27.153 -
  27.154 -(c) create your new domain.
  27.155 -
  27.156 -        xm create ...
  27.157 -
  27.158 ----
  27.159 -
  27.160 -That's pretty much all there is to it at the moment.  Hope this is
  27.161 -clear enough to get you going.  Now, a few serious caveats that will
  27.162 -be sorted out in the almost immediate future:
  27.163 -
  27.164 -WARNINGS:
  27.165 ----------
  27.166 -
  27.167 -1. There is NO locking in the VDI tools at the moment, so I'd avoid
  27.168 -running them in parallel, or more importantly, running them while the
  27.169 -daemon is running.
  27.170 -
  27.171 -2. I doubt that xend will be very happy about restarting if you have
  27.172 -parallax-using domains.  So if it dies while there are active parallax
  27.173 -doms, you may need to reboot.
  27.174 -
  27.175 -3. I've turned off write-in-place.  So at the moment, EVERY block
  27.176 -write is a log append on the blockstore.  I've been having some probs
  27.177 -with the radix tree's marking of writable blocks after snapshots and
  27.178 -will sort this out very soon.
  27.179 -
  27.180 -
    28.1 --- a/tools/blktap/parallax/block-async.c	Fri Jun 23 15:26:01 2006 -0600
    28.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.3 @@ -1,393 +0,0 @@
    28.4 -/* block-async.c
    28.5 - * 
    28.6 - * Asynchronous block wrappers for parallax.
    28.7 - */
    28.8 - 
    28.9 - 
   28.10 -#include <stdio.h>
   28.11 -#include <stdlib.h>
   28.12 -#include <string.h>
   28.13 -#include <pthread.h>
   28.14 -#include "block-async.h"
   28.15 -#include "blockstore.h"
   28.16 -#include "vdi.h"
   28.17 -
   28.18 -
   28.19 -#if 0
   28.20 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   28.21 -#else
   28.22 -#define DPRINTF(_f, _a...) ((void)0)
   28.23 -#endif
   28.24 -
   28.25 -/* We have a queue of outstanding I/O requests implemented as a 
   28.26 - * circular producer-consumer ring with free-running buffers.
   28.27 - * to allow reordering, this ring indirects to indexes in an 
   28.28 - * ring of io_structs.
   28.29 - * 
   28.30 - * the block_* calls may either add an entry to this ring and return, 
   28.31 - * or satisfy the request immediately and call the callback directly.
   28.32 - * None of the io calls in parallax should be nested enough to worry 
   28.33 - * about stack problems with this approach.
   28.34 - */
   28.35 -
   28.36 -struct read_args {
   28.37 -    uint64_t addr;
   28.38 -};
   28.39 -
   28.40 -struct write_args {
   28.41 -    uint64_t   addr;
   28.42 -    char *block;
   28.43 -};
   28.44 -
   28.45 -struct alloc_args {
   28.46 -    char *block;
   28.47 -};
   28.48 - 
   28.49 -struct pending_io_req {
   28.50 -    enum {IO_READ, IO_WRITE, IO_ALLOC, IO_RWAKE, IO_WWAKE} op;
   28.51 -    union {
   28.52 -        struct read_args  r;
   28.53 -        struct write_args w;
   28.54 -        struct alloc_args a;
   28.55 -    } u;
   28.56 -    io_cb_t cb;
   28.57 -    void *param;
   28.58 -};
   28.59 -
   28.60 -void radix_lock_init(struct radix_lock *r)
   28.61 -{
   28.62 -    int i;
   28.63 -    
   28.64 -    pthread_mutex_init(&r->lock, NULL);
   28.65 -    for (i=0; i < 1024; i++) {
   28.66 -        r->lines[i] = 0;
   28.67 -        r->waiters[i] = NULL;
   28.68 -        r->state[i] = ANY;
   28.69 -    }
   28.70 -}
   28.71 -
   28.72 -/* maximum outstanding I/O requests issued asynchronously */
   28.73 -/* must be a power of 2.*/
   28.74 -#define MAX_PENDING_IO 1024
   28.75 -
   28.76 -/* how many threads to concurrently issue I/O to the disk. */
   28.77 -#define IO_POOL_SIZE   10
   28.78 -
   28.79 -static struct pending_io_req pending_io_reqs[MAX_PENDING_IO];
   28.80 -static int pending_io_list[MAX_PENDING_IO];
   28.81 -static unsigned long io_prod = 0, io_cons = 0, io_free = 0;
   28.82 -#define PENDING_IO_MASK(_x) ((_x) & (MAX_PENDING_IO - 1))
   28.83 -#define PENDING_IO_IDX(_x) ((_x) - pending_io_reqs)
   28.84 -#define PENDING_IO_ENT(_x) \
   28.85 -	(&pending_io_reqs[pending_io_list[PENDING_IO_MASK(_x)]])
   28.86 -#define CAN_PRODUCE_PENDING_IO ((io_free + MAX_PENDING_IO) != io_prod)
   28.87 -#define CAN_CONSUME_PENDING_IO (io_cons != io_prod)
   28.88 -static pthread_mutex_t pending_io_lock = PTHREAD_MUTEX_INITIALIZER;
   28.89 -static pthread_cond_t  pending_io_cond = PTHREAD_COND_INITIALIZER;
   28.90 -
   28.91 -static void init_pending_io(void)
   28.92 -{
   28.93 -    int i;
   28.94 -	
   28.95 -    for (i=0; i<MAX_PENDING_IO; i++)
   28.96 -        pending_io_list[i] = i;
   28.97 -		
   28.98 -} 
   28.99 -
  28.100 -void block_read(uint64_t addr, io_cb_t cb, void *param)
  28.101 -{
  28.102 -    struct pending_io_req *req;
  28.103 -    
  28.104 -    pthread_mutex_lock(&pending_io_lock);
  28.105 -    assert(CAN_PRODUCE_PENDING_IO);
  28.106 -    
  28.107 -    req = PENDING_IO_ENT(io_prod++);
  28.108 -    DPRINTF("Produce (R) %lu (%p)\n", io_prod - 1, req);
  28.109 -    req->op = IO_READ;
  28.110 -    req->u.r.addr = addr;
  28.111 -    req->cb = cb;
  28.112 -    req->param = param;
  28.113 -    
  28.114 -    pthread_cond_signal(&pending_io_cond);
  28.115 -    pthread_mutex_unlock(&pending_io_lock);	
  28.116 -}
  28.117 -
  28.118 -
  28.119 -void block_write(uint64_t addr, char *block, io_cb_t cb, void *param)
  28.120 -{
  28.121 -    struct pending_io_req *req;
  28.122 -    
  28.123 -    pthread_mutex_lock(&pending_io_lock);
  28.124 -    assert(CAN_PRODUCE_PENDING_IO);
  28.125 -    
  28.126 -    req = PENDING_IO_ENT(io_prod++);
  28.127 -    DPRINTF("Produce (W) %lu (%p)\n", io_prod - 1, req);
  28.128 -    req->op = IO_WRITE;
  28.129 -    req->u.w.addr  = addr;
  28.130 -    req->u.w.block = block;
  28.131 -    req->cb = cb;
  28.132 -    req->param = param;
  28.133 -    
  28.134 -    pthread_cond_signal(&pending_io_cond);
  28.135 -    pthread_mutex_unlock(&pending_io_lock);	
  28.136 -}
  28.137 -
  28.138 -
  28.139 -void block_alloc(char *block, io_cb_t cb, void *param)
  28.140 -{
  28.141 -    struct pending_io_req *req;
  28.142 -	
  28.143 -    pthread_mutex_lock(&pending_io_lock);
  28.144 -    assert(CAN_PRODUCE_PENDING_IO);
  28.145 -    
  28.146 -    req = PENDING_IO_ENT(io_prod++);
  28.147 -    req->op = IO_ALLOC;
  28.148 -    req->u.a.block = block;
  28.149 -    req->cb = cb;
  28.150 -    req->param = param;
  28.151 -    
  28.152 -    pthread_cond_signal(&pending_io_cond);
  28.153 -    pthread_mutex_unlock(&pending_io_lock);	
  28.154 -}
  28.155 -
  28.156 -void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  28.157 -{
  28.158 -    struct io_ret ret;
  28.159 -    pthread_mutex_lock(&r->lock);
  28.160 -    
  28.161 -    if (( r->lines[row] >= 0 ) && (r->state[row] != STOP)) {
  28.162 -        r->lines[row]++;
  28.163 -        r->state[row] = READ;
  28.164 -        DPRINTF("RLOCK  : %3d (row: %d)\n", r->lines[row], row);
  28.165 -        pthread_mutex_unlock(&r->lock);
  28.166 -        ret.type = IO_INT_T;
  28.167 -        ret.u.i = 0;
  28.168 -        cb(ret, param);
  28.169 -    } else {
  28.170 -        struct radix_wait **rwc;
  28.171 -        struct radix_wait *rw = 
  28.172 -            (struct radix_wait *) malloc (sizeof(struct radix_wait));
  28.173 -        DPRINTF("RLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
  28.174 -        rw->type  = RLOCK;
  28.175 -        rw->param = param;
  28.176 -        rw->cb    = cb;
  28.177 -        rw->next  = NULL;
  28.178 -        /* append to waiters list. */
  28.179 -        rwc = &r->waiters[row];
  28.180 -        while (*rwc != NULL) rwc = &(*rwc)->next;
  28.181 -        *rwc = rw;
  28.182 -        pthread_mutex_unlock(&r->lock);
  28.183 -        return;
  28.184 -    }
  28.185 -}
  28.186 -
  28.187 -
  28.188 -void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  28.189 -{
  28.190 -    struct io_ret ret;
  28.191 -    pthread_mutex_lock(&r->lock);
  28.192 -    
  28.193 -    /* the second check here is redundant -- just here for debugging now. */
  28.194 -    if ((r->state[row] == ANY) && ( r->lines[row] == 0 )) {
  28.195 -        r->state[row] = STOP;
  28.196 -        r->lines[row] = -1;
  28.197 -        DPRINTF("WLOCK  : %3d (row: %d)\n", r->lines[row], row);
  28.198 -        pthread_mutex_unlock(&r->lock);
  28.199 -        ret.type = IO_INT_T;
  28.200 -        ret.u.i = 0;
  28.201 -        cb(ret, param);
  28.202 -    } else {
  28.203 -        struct radix_wait **rwc;
  28.204 -        struct radix_wait *rw = 
  28.205 -            (struct radix_wait *) malloc (sizeof(struct radix_wait));
  28.206 -        DPRINTF("WLOCK  : %3d (row: %d) -- DEFERRED!\n", r->lines[row], row);
  28.207 -        rw->type  = WLOCK;
  28.208 -        rw->param = param;
  28.209 -        rw->cb    = cb;
  28.210 -        rw->next  = NULL;
  28.211 -        /* append to waiters list. */
  28.212 -        rwc = &r->waiters[row];
  28.213 -        while (*rwc != NULL) rwc = &(*rwc)->next;
  28.214 -        *rwc = rw;
  28.215 -        pthread_mutex_unlock(&r->lock);
  28.216 -        return;
  28.217 -    }
  28.218 -	
  28.219 -}
  28.220 -
  28.221 -/* called with radix_lock locked and lock count of zero. */
  28.222 -static void wake_waiters(struct radix_lock *r, int row)
  28.223 -{
  28.224 -    struct pending_io_req *req;
  28.225 -    struct radix_wait *rw;
  28.226 -    
  28.227 -    if (r->lines[row] != 0) return;
  28.228 -    if (r->waiters[row] == NULL) return; 
  28.229 -    
  28.230 -    if (r->waiters[row]->type == WLOCK) {
  28.231 -
  28.232 -        rw = r->waiters[row];
  28.233 -        pthread_mutex_lock(&pending_io_lock);
  28.234 -        assert(CAN_PRODUCE_PENDING_IO);
  28.235 -        
  28.236 -        req = PENDING_IO_ENT(io_prod++);
  28.237 -        req->op    = IO_WWAKE;
  28.238 -        req->cb    = rw->cb;
  28.239 -        req->param = rw->param;
  28.240 -        r->lines[row] = -1; /* write lock the row. */
  28.241 -        r->state[row] = STOP;
  28.242 -        r->waiters[row] = rw->next;
  28.243 -        free(rw);
  28.244 -        pthread_mutex_unlock(&pending_io_lock);
  28.245 -    
  28.246 -    } else /* RLOCK */ {
  28.247 -
  28.248 -        while ((r->waiters[row] != NULL) && (r->waiters[row]->type == RLOCK)) {
  28.249 -            rw = r->waiters[row];
  28.250 -            pthread_mutex_lock(&pending_io_lock);
  28.251 -            assert(CAN_PRODUCE_PENDING_IO);
  28.252 -            
  28.253 -            req = PENDING_IO_ENT(io_prod++);
  28.254 -            req->op    = IO_RWAKE;
  28.255 -            req->cb    = rw->cb;
  28.256 -            req->param = rw->param;
  28.257 -            r->lines[row]++; /* read lock the row. */
  28.258 -            r->state[row] = READ; 
  28.259 -            r->waiters[row] = rw->next;
  28.260 -            free(rw);
  28.261 -            pthread_mutex_unlock(&pending_io_lock);
  28.262 -        }
  28.263 -
  28.264 -        if (r->waiters[row] != NULL) /* There is a write queued still */
  28.265 -            r->state[row] = STOP;
  28.266 -    }	
  28.267 -    
  28.268 -    pthread_mutex_lock(&pending_io_lock);
  28.269 -    pthread_cond_signal(&pending_io_cond);
  28.270 -    pthread_mutex_unlock(&pending_io_lock);
  28.271 -}
  28.272 -
  28.273 -void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  28.274 -{
  28.275 -    struct io_ret ret;
  28.276 -	
  28.277 -    pthread_mutex_lock(&r->lock);
  28.278 -    assert(r->lines[row] > 0); /* try to catch misuse. */
  28.279 -    r->lines[row]--;
  28.280 -    if (r->lines[row] == 0) {
  28.281 -        r->state[row] = ANY;
  28.282 -        wake_waiters(r, row);
  28.283 -    }
  28.284 -    pthread_mutex_unlock(&r->lock);
  28.285 -    cb(ret, param);
  28.286 -}
  28.287 -
  28.288 -void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param)
  28.289 -{
  28.290 -    struct io_ret ret;
  28.291 -    
  28.292 -    pthread_mutex_lock(&r->lock);
  28.293 -    assert(r->lines[row] == -1); /* try to catch misuse. */
  28.294 -    r->lines[row] = 0;
  28.295 -    r->state[row] = ANY;
  28.296 -    wake_waiters(r, row);
  28.297 -    pthread_mutex_unlock(&r->lock);
  28.298 -    cb(ret, param);
  28.299 -}
  28.300 -
  28.301 -/* consumer calls */
  28.302 -static void do_next_io_req(struct pending_io_req *req)
  28.303 -{
  28.304 -    struct io_ret          ret;
  28.305 -    void  *param;
  28.306 -    
  28.307 -    switch (req->op) {
  28.308 -    case IO_READ:
  28.309 -        ret.type = IO_BLOCK_T;
  28.310 -        ret.u.b  = readblock(req->u.r.addr);
  28.311 -        break;
  28.312 -    case IO_WRITE:
  28.313 -        ret.type = IO_INT_T;
  28.314 -        ret.u.i  = writeblock(req->u.w.addr, req->u.w.block);
  28.315 -        DPRINTF("wrote %d at %Lu\n", *(int *)(req->u.w.block), req->u.w.addr);
  28.316 -        break;
  28.317 -    case IO_ALLOC:
  28.318 -        ret.type = IO_ADDR_T;
  28.319 -        ret.u.a  = allocblock(req->u.a.block);
  28.320 -        break;
  28.321 -    case IO_RWAKE:
  28.322 -        DPRINTF("WAKE DEFERRED RLOCK!\n");
  28.323 -        ret.type = IO_INT_T;
  28.324 -        ret.u.i  = 0;
  28.325 -        break;
  28.326 -    case IO_WWAKE:
  28.327 -        DPRINTF("WAKE DEFERRED WLOCK!\n");
  28.328 -        ret.type = IO_INT_T;
  28.329 -        ret.u.i  = 0;
  28.330 -        break;
  28.331 -    default:
  28.332 -        DPRINTF("Unknown IO operation on pending list!\n");
  28.333 -        return;
  28.334 -    }
  28.335 -    
  28.336 -    param = req->param;
  28.337 -    pthread_mutex_lock(&pending_io_lock);
  28.338 -    pending_io_list[PENDING_IO_MASK(io_free++)] = PENDING_IO_IDX(req);
  28.339 -    pthread_mutex_unlock(&pending_io_lock);
  28.340 -	
  28.341 -    assert(req->cb != NULL);
  28.342 -    req->cb(ret, param);
  28.343 -    
  28.344 -}
  28.345 -
  28.346 -void *io_thread(void *param) 
  28.347 -{
  28.348 -    int tid;
  28.349 -    struct pending_io_req *req;
  28.350 -    
  28.351 -    /* Set this thread's tid. */
  28.352 -    tid = *(int *)param;
  28.353 -    free(param);
  28.354 -    
  28.355 -start:
  28.356 -    pthread_mutex_lock(&pending_io_lock);
  28.357 -    while (io_prod == io_cons) {
  28.358 -        pthread_cond_wait(&pending_io_cond, &pending_io_lock);
  28.359 -    }
  28.360 -    
  28.361 -    if (io_prod == io_cons) {
  28.362 -        /* unnecessary wakeup. */
  28.363 -        pthread_mutex_unlock(&pending_io_lock);
  28.364 -        goto start;
  28.365 -    }
  28.366 -    
  28.367 -    req = PENDING_IO_ENT(io_cons++);
  28.368 -    pthread_mutex_unlock(&pending_io_lock);
  28.369 -	
  28.370 -    do_next_io_req(req);
  28.371 -    
  28.372 -    goto start;
  28.373 -	
  28.374 -}
  28.375 -
  28.376 -static pthread_t io_pool[IO_POOL_SIZE];
  28.377 -void start_io_threads(void)
  28.378 -
  28.379 -{	
  28.380 -    int i, tid=0;
  28.381 -    
  28.382 -    for (i=0; i < IO_POOL_SIZE; i++) {
  28.383 -        int ret, *t;
  28.384 -        t = (int *)malloc(sizeof(int));
  28.385 -        *t = tid++;
  28.386 -        ret = pthread_create(&io_pool[i], NULL, io_thread, t);
  28.387 -        if (ret != 0) printf("Error starting thread %d\n", i);
  28.388 -    }
  28.389 -	
  28.390 -}
  28.391 -
  28.392 -void init_block_async(void)
  28.393 -{
  28.394 -    init_pending_io();
  28.395 -    start_io_threads();
  28.396 -}
    29.1 --- a/tools/blktap/parallax/block-async.h	Fri Jun 23 15:26:01 2006 -0600
    29.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.3 @@ -1,69 +0,0 @@
    29.4 -/* block-async.h
    29.5 - * 
    29.6 - * Asynchronous block wrappers for parallax.
    29.7 - */
    29.8 - 
    29.9 -#ifndef _BLOCKASYNC_H_
   29.10 -#define _BLOCKASYNC_H_
   29.11 -
   29.12 -#include <assert.h>
   29.13 -#include <xenctrl.h>
   29.14 -#include "vdi.h"
   29.15 -
   29.16 -struct io_ret
   29.17 -{
   29.18 -    enum {IO_ADDR_T, IO_BLOCK_T, IO_INT_T} type;
   29.19 -    union {
   29.20 -        uint64_t   a;
   29.21 -        char *b;
   29.22 -        int   i;
   29.23 -    } u;
   29.24 -};
   29.25 -
   29.26 -typedef void (*io_cb_t)(struct io_ret r, void *param);
   29.27 -
   29.28 -/* per-vdi lock structures to make sure requests run in a safe order. */
   29.29 -struct radix_wait {
   29.30 -    enum {RLOCK, WLOCK} type;
   29.31 -    io_cb_t  cb;
   29.32 -    void    *param;
   29.33 -    struct radix_wait *next;
   29.34 -};
   29.35 -
   29.36 -struct radix_lock {
   29.37 -    pthread_mutex_t lock;
   29.38 -    int                    lines[1024];
   29.39 -    struct radix_wait     *waiters[1024];
   29.40 -    enum {ANY, READ, STOP} state[1024];
   29.41 -};
   29.42 -void radix_lock_init(struct radix_lock *r);
   29.43 -
   29.44 -void block_read(uint64_t addr, io_cb_t cb, void *param);
   29.45 -void block_write(uint64_t addr, char *block, io_cb_t cb, void *param);
   29.46 -void block_alloc(char *block, io_cb_t cb, void *param);
   29.47 -void block_rlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   29.48 -void block_wlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   29.49 -void block_runlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   29.50 -void block_wunlock(struct radix_lock *r, int row, io_cb_t cb, void *param);
   29.51 -void init_block_async(void);
   29.52 -
   29.53 -static inline uint64_t IO_ADDR(struct io_ret r)
   29.54 -{
   29.55 -    assert(r.type == IO_ADDR_T);
   29.56 -    return r.u.a;
   29.57 -}
   29.58 -
   29.59 -static inline char *IO_BLOCK(struct io_ret r)
   29.60 -{
   29.61 -    assert(r.type == IO_BLOCK_T);
   29.62 -    return r.u.b;
   29.63 -}
   29.64 -
   29.65 -static inline int IO_INT(struct io_ret r)
   29.66 -{
   29.67 -    assert(r.type == IO_INT_T);
   29.68 -    return r.u.i;
   29.69 -}
   29.70 -
   29.71 -
   29.72 -#endif //_BLOCKASYNC_H_
    30.1 --- a/tools/blktap/parallax/blockstore.c	Fri Jun 23 15:26:01 2006 -0600
    30.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.3 @@ -1,1348 +0,0 @@
    30.4 -/**************************************************************************
    30.5 - * 
    30.6 - * blockstore.c
    30.7 - *
    30.8 - * Simple block store interface
    30.9 - *
   30.10 - */
   30.11 - 
   30.12 -#include <fcntl.h>
   30.13 -#include <unistd.h>
   30.14 -#include <stdio.h>
   30.15 -#include <stdlib.h>
   30.16 -#include <string.h>
   30.17 -#include <sys/types.h>
   30.18 -#include <sys/stat.h>
   30.19 -#include <sys/time.h>
   30.20 -#include <stdarg.h>
   30.21 -#include "blockstore.h"
   30.22 -#include <pthread.h>
   30.23 -
   30.24 -//#define BLOCKSTORE_REMOTE
   30.25 -//#define BSDEBUG
   30.26 -
   30.27 -#define RETRY_TIMEOUT 1000000 /* microseconds */
   30.28 -
   30.29 -/*****************************************************************************
   30.30 - * Debugging
   30.31 - */
   30.32 -#ifdef BSDEBUG
   30.33 -void DB(char *format, ...)
   30.34 -{
   30.35 -    va_list args;
   30.36 -    fprintf(stderr, "[%05u] ", (int)pthread_getspecific(tid_key));
   30.37 -    va_start(args, format);
   30.38 -    vfprintf(stderr, format, args);
   30.39 -    va_end(args);
   30.40 -}
   30.41 -#else
   30.42 -#define DB(format, ...) (void)0
   30.43 -#endif
   30.44 -
   30.45 -#ifdef BLOCKSTORE_REMOTE
   30.46 -
   30.47 -#include <sys/socket.h>
   30.48 -#include <sys/ioctl.h>
   30.49 -#include <netinet/in.h>
   30.50 -#include <netdb.h>
   30.51 -
   30.52 -/*****************************************************************************
   30.53 - * Network state                                                             *
   30.54 - *****************************************************************************/
   30.55 -
   30.56 -/* The individual disk servers we talks to. These will be referenced by
   30.57 - * an integer index into bsservers[].
   30.58 - */
   30.59 -bsserver_t bsservers[MAX_SERVERS];
   30.60 -
   30.61 -/* The cluster map. This is indexed by an integer cluster number.
   30.62 - */
   30.63 -bscluster_t bsclusters[MAX_CLUSTERS];
   30.64 -
   30.65 -/* Local socket.
   30.66 - */
   30.67 -struct sockaddr_in sin_local;
   30.68 -int bssock = 0;
   30.69 -
   30.70 -/*****************************************************************************
   30.71 - * Notification                                                              *
   30.72 - *****************************************************************************/
   30.73 -
   30.74 -typedef struct pool_thread_t_struct {
   30.75 -    pthread_mutex_t ptmutex;
   30.76 -    pthread_cond_t ptcv;
   30.77 -    int newdata;
   30.78 -} pool_thread_t;
   30.79 -
   30.80 -pool_thread_t pool_thread[READ_POOL_SIZE+1];
   30.81 -
   30.82 -#define RECV_NOTIFY(tid) { \
   30.83 -    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
   30.84 -    pool_thread[tid].newdata = 1; \
   30.85 -    DB("CV Waking %u", tid); \
   30.86 -    pthread_cond_signal(&(pool_thread[tid].ptcv)); \
   30.87 -    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
   30.88 -#define RECV_AWAIT(tid) { \
   30.89 -    pthread_mutex_lock(&(pool_thread[tid].ptmutex)); \
   30.90 -    if (pool_thread[tid].newdata) { \
   30.91 -        pool_thread[tid].newdata = 0; \
   30.92 -        DB("CV Woken %u", tid); \
   30.93 -    } \
   30.94 -    else { \
   30.95 -        DB("CV Waiting %u", tid); \
   30.96 -        pthread_cond_wait(&(pool_thread[tid].ptcv), \
   30.97 -                          &(pool_thread[tid].ptmutex)); \
   30.98 -    } \
   30.99 -    pthread_mutex_unlock(&(pool_thread[tid].ptmutex)); }
  30.100 -
  30.101 -/*****************************************************************************
  30.102 - * Message queue management                                                  *
  30.103 - *****************************************************************************/
  30.104 -
  30.105 -/* Protects the queue manipulation critcal regions.
  30.106 - */
  30.107 -pthread_mutex_t ptmutex_queue;
  30.108 -#define ENTER_QUEUE_CR pthread_mutex_lock(&ptmutex_queue)
  30.109 -#define LEAVE_QUEUE_CR pthread_mutex_unlock(&ptmutex_queue)
  30.110 -
  30.111 -pthread_mutex_t ptmutex_recv;
  30.112 -#define ENTER_RECV_CR pthread_mutex_lock(&ptmutex_recv)
  30.113 -#define LEAVE_RECV_CR pthread_mutex_unlock(&ptmutex_recv)
  30.114 -
  30.115 -/* A message queue entry. We allocate one of these for every request we send.
  30.116 - * Asynchronous reply reception also used one of these.
  30.117 - */
  30.118 -typedef struct bsq_t_struct {
  30.119 -    struct bsq_t_struct *prev;
  30.120 -    struct bsq_t_struct *next;
  30.121 -    int status;
  30.122 -    int server;
  30.123 -    int length;
  30.124 -    struct msghdr msghdr;
  30.125 -    struct iovec iov[2];
  30.126 -    int tid;
  30.127 -    struct timeval tv_sent;
  30.128 -    bshdr_t message;
  30.129 -    void *block;
  30.130 -} bsq_t;
  30.131 -
  30.132 -#define BSQ_STATUS_MATCHED 1
  30.133 -
  30.134 -pthread_mutex_t ptmutex_luid;
  30.135 -#define ENTER_LUID_CR pthread_mutex_lock(&ptmutex_luid)
  30.136 -#define LEAVE_LUID_CR pthread_mutex_unlock(&ptmutex_luid)
  30.137 -
  30.138 -static uint64_t luid_cnt = 0x1000ULL;
  30.139 -uint64_t new_luid(void) {
  30.140 -    uint64_t luid;
  30.141 -    ENTER_LUID_CR;
  30.142 -    luid = luid_cnt++;
  30.143 -    LEAVE_LUID_CR;
  30.144 -    return luid;
  30.145 -}
  30.146 -
  30.147 -/* Queue of outstanding requests.
  30.148 - */
  30.149 -bsq_t *bs_head = NULL;
  30.150 -bsq_t *bs_tail = NULL;
  30.151 -int bs_qlen = 0;
  30.152 -
  30.153 -/*
  30.154 - */
  30.155 -void queuedebug(char *msg) {
  30.156 -    bsq_t *q;
  30.157 -    ENTER_QUEUE_CR;
  30.158 -    fprintf(stderr, "Q: %s len=%u\n", msg, bs_qlen);
  30.159 -    for (q = bs_head; q; q = q->next) {
  30.160 -        fprintf(stderr, "  luid=%016llx server=%u\n",
  30.161 -                q->message.luid, q->server);
  30.162 -    }
  30.163 -    LEAVE_QUEUE_CR;
  30.164 -}
  30.165 -
  30.166 -int enqueue(bsq_t *qe) {
  30.167 -    ENTER_QUEUE_CR;
  30.168 -    qe->next = NULL;
  30.169 -    qe->prev = bs_tail;
  30.170 -    if (!bs_head)
  30.171 -        bs_head = qe;
  30.172 -    else
  30.173 -        bs_tail->next = qe;
  30.174 -    bs_tail = qe;
  30.175 -    bs_qlen++;
  30.176 -    LEAVE_QUEUE_CR;
  30.177 -#ifdef BSDEBUG
  30.178 -    queuedebug("enqueue");
  30.179 -#endif
  30.180 -    return 0;
  30.181 -}
  30.182 -
  30.183 -int dequeue(bsq_t *qe) {
  30.184 -    bsq_t *q;
  30.185 -    ENTER_QUEUE_CR;
  30.186 -    for (q = bs_head; q; q = q->next) {
  30.187 -        if (q == qe) {
  30.188 -            if (q->prev)
  30.189 -                q->prev->next = q->next;
  30.190 -            else 
  30.191 -                bs_head = q->next;
  30.192 -            if (q->next)
  30.193 -                q->next->prev = q->prev;
  30.194 -            else
  30.195 -                bs_tail = q->prev;
  30.196 -            bs_qlen--;
  30.197 -            goto found;
  30.198 -        }
  30.199 -    }
  30.200 -
  30.201 -    LEAVE_QUEUE_CR;
  30.202 -#ifdef BSDEBUG
  30.203 -    queuedebug("dequeue not found");
  30.204 -#endif
  30.205 -    return 0;
  30.206 -
  30.207 -    found:
  30.208 -    LEAVE_QUEUE_CR;
  30.209 -#ifdef BSDEBUG
  30.210 -    queuedebug("dequeue not found");
  30.211 -#endif
  30.212 -    return 1;
  30.213 -}
  30.214 -
  30.215 -bsq_t *queuesearch(bsq_t *qe) {
  30.216 -    bsq_t *q;
  30.217 -    ENTER_QUEUE_CR;
  30.218 -    for (q = bs_head; q; q = q->next) {
  30.219 -        if ((qe->server == q->server) &&
  30.220 -            (qe->message.operation == q->message.operation) &&
  30.221 -            (qe->message.luid == q->message.luid)) {
  30.222 -
  30.223 -            if ((q->message.operation == BSOP_READBLOCK) &&
  30.224 -                ((q->message.flags & BSOP_FLAG_ERROR) == 0)) {
  30.225 -                q->block = qe->block;
  30.226 -                qe->block = NULL;
  30.227 -            }
  30.228 -            q->length = qe->length;
  30.229 -            q->message.flags = qe->message.flags;
  30.230 -            q->message.id = qe->message.id;
  30.231 -            q->status |= BSQ_STATUS_MATCHED;
  30.232 -
  30.233 -            if (q->prev)
  30.234 -                q->prev->next = q->next;
  30.235 -            else 
  30.236 -                bs_head = q->next;
  30.237 -            if (q->next)
  30.238 -                q->next->prev = q->prev;
  30.239 -            else
  30.240 -                bs_tail = q->prev;
  30.241 -            q->next = NULL;
  30.242 -            q->prev = NULL;
  30.243 -            bs_qlen--;
  30.244 -            goto found;
  30.245 -        }
  30.246 -    }
  30.247 -
  30.248 -    LEAVE_QUEUE_CR;
  30.249 -#ifdef BSDEBUG
  30.250 -    queuedebug("queuesearch not found");
  30.251 -#endif
  30.252 -    return NULL;
  30.253 -
  30.254 -    found:
  30.255 -    LEAVE_QUEUE_CR;
  30.256 -#ifdef BSDEBUG
  30.257 -    queuedebug("queuesearch found");
  30.258 -#endif
  30.259 -    return q;
  30.260 -}
  30.261 -
  30.262 -/*****************************************************************************
  30.263 - * Network communication                                                     *
  30.264 - *****************************************************************************/
  30.265 -
  30.266 -int send_message(bsq_t *qe) {
  30.267 -    int rc;
  30.268 -
  30.269 -    qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
  30.270 -    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
  30.271 -    qe->msghdr.msg_iov = qe->iov;
  30.272 -    if (qe->block)
  30.273 -        qe->msghdr.msg_iovlen = 2;
  30.274 -    else
  30.275 -        qe->msghdr.msg_iovlen = 1;
  30.276 -    qe->msghdr.msg_control = NULL;
  30.277 -    qe->msghdr.msg_controllen = 0;
  30.278 -    qe->msghdr.msg_flags = 0;
  30.279 -
  30.280 -    qe->iov[0].iov_base = (void *)&(qe->message);
  30.281 -    qe->iov[0].iov_len = MSGBUFSIZE_ID;
  30.282 -
  30.283 -    if (qe->block) {
  30.284 -        qe->iov[1].iov_base = qe->block;
  30.285 -        qe->iov[1].iov_len = BLOCK_SIZE;
  30.286 -    }
  30.287 -
  30.288 -    qe->message.luid = new_luid();
  30.289 -
  30.290 -    qe->status = 0;
  30.291 -    qe->tid = (int)pthread_getspecific(tid_key);
  30.292 -    if (enqueue(qe) < 0) {
  30.293 -        fprintf(stderr, "Error enqueuing request.\n");
  30.294 -        return -1;
  30.295 -    }
  30.296 -
  30.297 -    gettimeofday(&(qe->tv_sent), NULL);
  30.298 -    DB("send_message to %d luid=%016llx\n", qe->server, qe->message.luid);
  30.299 -    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
  30.300 -    //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
  30.301 -    //           (struct sockaddr *)&(bsservers[qe->server].sin),
  30.302 -    //           sizeof(struct sockaddr_in));
  30.303 -    if (rc < 0)
  30.304 -        return rc;
  30.305 -
  30.306 -    return rc;
  30.307 -}
  30.308 -
  30.309 -int recv_message(bsq_t *qe) {
  30.310 -    struct sockaddr_in from;
  30.311 -    //int flen = sizeof(from);
  30.312 -    int rc;
  30.313 -
  30.314 -    qe->msghdr.msg_name = &from;
  30.315 -    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
  30.316 -    qe->msghdr.msg_iov = qe->iov;
  30.317 -    if (qe->block)
  30.318 -        qe->msghdr.msg_iovlen = 2;
  30.319 -    else
  30.320 -        qe->msghdr.msg_iovlen = 1;
  30.321 -    qe->msghdr.msg_control = NULL;
  30.322 -    qe->msghdr.msg_controllen = 0;
  30.323 -    qe->msghdr.msg_flags = 0;
  30.324 -
  30.325 -    qe->iov[0].iov_base = (void *)&(qe->message);
  30.326 -    qe->iov[0].iov_len = MSGBUFSIZE_ID;
  30.327 -    if (qe->block) {
  30.328 -        qe->iov[1].iov_base = qe->block;
  30.329 -        qe->iov[1].iov_len = BLOCK_SIZE;
  30.330 -    }
  30.331 -
  30.332 -    rc = recvmsg(bssock, &(qe->msghdr), 0);
  30.333 -
  30.334 -    //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
  30.335 -    //               (struct sockaddr *)&from, &flen);
  30.336 -    return rc;
  30.337 -}
  30.338 -
  30.339 -int get_server_number(struct sockaddr_in *sin) {
  30.340 -    int i;
  30.341 -
  30.342 -#ifdef BSDEBUG2
  30.343 -    fprintf(stderr,
  30.344 -            "get_server_number(%u.%u.%u.%u/%u)\n",
  30.345 -            (unsigned int)sin->sin_addr.s_addr & 0xff,
  30.346 -            ((unsigned int)sin->sin_addr.s_addr >> 8) & 0xff,
  30.347 -            ((unsigned int)sin->sin_addr.s_addr >> 16) & 0xff,
  30.348 -            ((unsigned int)sin->sin_addr.s_addr >> 24) & 0xff,
  30.349 -            (unsigned int)sin->sin_port);
  30.350 -#endif
  30.351 -
  30.352 -    for (i = 0; i < MAX_SERVERS; i++) {
  30.353 -        if (bsservers[i].hostname) {
  30.354 -#ifdef BSDEBUG2
  30.355 -            fprintf(stderr,
  30.356 -                    "get_server_number check %u.%u.%u.%u/%u\n",
  30.357 -                    (unsigned int)bsservers[i].sin.sin_addr.s_addr&0xff,
  30.358 -                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 8)&0xff,
  30.359 -                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 16)&0xff,
  30.360 -                    ((unsigned int)bsservers[i].sin.sin_addr.s_addr >> 24)&0xff,
  30.361 -                    (unsigned int)bsservers[i].sin.sin_port);
  30.362 -#endif
  30.363 -            if ((sin->sin_family == bsservers[i].sin.sin_family) &&
  30.364 -                (sin->sin_port == bsservers[i].sin.sin_port) &&
  30.365 -                (memcmp((void *)&(sin->sin_addr),
  30.366 -                        (void *)&(bsservers[i].sin.sin_addr),
  30.367 -                        sizeof(struct in_addr)) == 0)) {
  30.368 -                return i;
  30.369 -            }
  30.370 -        }        
  30.371 -    }
  30.372 -
  30.373 -    return -1;
  30.374 -}
  30.375 -
  30.376 -void *rx_buffer = NULL;
  30.377 -bsq_t rx_qe;
  30.378 -bsq_t *recv_any(void) {
  30.379 -    struct sockaddr_in from;
  30.380 -    int rc;
  30.381 -    
  30.382 -    DB("ENTER recv_any\n");
  30.383 -
  30.384 -    rx_qe.msghdr.msg_name = &from;
  30.385 -    rx_qe.msghdr.msg_namelen = sizeof(struct sockaddr_in);
  30.386 -    rx_qe.msghdr.msg_iov = rx_qe.iov;
  30.387 -    if (!rx_buffer) {
  30.388 -        rx_buffer = malloc(BLOCK_SIZE);
  30.389 -        if (!rx_buffer) {
  30.390 -            perror("recv_any malloc");
  30.391 -            return NULL;
  30.392 -        }
  30.393 -    }
  30.394 -    rx_qe.block = rx_buffer;
  30.395 -    rx_buffer = NULL;
  30.396 -    rx_qe.msghdr.msg_iovlen = 2;
  30.397 -    rx_qe.msghdr.msg_control = NULL;
  30.398 -    rx_qe.msghdr.msg_controllen = 0;
  30.399 -    rx_qe.msghdr.msg_flags = 0;
  30.400 -    
  30.401 -    rx_qe.iov[0].iov_base = (void *)&(rx_qe.message);
  30.402 -    rx_qe.iov[0].iov_len = MSGBUFSIZE_ID;
  30.403 -    rx_qe.iov[1].iov_base = rx_qe.block;
  30.404 -    rx_qe.iov[1].iov_len = BLOCK_SIZE;
  30.405 -
  30.406 -    rc = recvmsg(bssock, &(rx_qe.msghdr), 0);
  30.407 -    if (rc < 0) {
  30.408 -        perror("recv_any");
  30.409 -        return NULL;
  30.410 -    }
  30.411 -
  30.412 -    rx_qe.length = rc;    
  30.413 -    rx_qe.server = get_server_number(&from);
  30.414 -
  30.415 -    DB("recv_any from %d luid=%016llx len=%u\n",
  30.416 -       rx_qe.server, rx_qe.message.luid, rx_qe.length);
  30.417 -
  30.418 -    return &rx_qe;
  30.419 -}
  30.420 -
  30.421 -void recv_recycle_buffer(bsq_t *q) {
  30.422 -    if (q->block) {
  30.423 -        rx_buffer = q->block;
  30.424 -        q->block = NULL;
  30.425 -    }
  30.426 -}
  30.427 -
  30.428 -// cycle through reading any incoming, searching for a match in the
  30.429 -// queue, until we have all we need.
  30.430 -int wait_recv(bsq_t **reqs, int numreqs) {
  30.431 -    bsq_t *q, *m;
  30.432 -    unsigned int x, i;
  30.433 -    int tid = (int)pthread_getspecific(tid_key);
  30.434 -
  30.435 -    DB("ENTER wait_recv %u\n", numreqs);
  30.436 -
  30.437 -    checkmatch:
  30.438 -    x = 0xffffffff;
  30.439 -    for (i = 0; i < numreqs; i++) {
  30.440 -        x &= reqs[i]->status;
  30.441 -    }
  30.442 -    if ((x & BSQ_STATUS_MATCHED)) {
  30.443 -        DB("LEAVE wait_recv\n");
  30.444 -        return numreqs;
  30.445 -    }
  30.446 -
  30.447 -    RECV_AWAIT(tid);
  30.448 -
  30.449 -    /*
  30.450 -    rxagain:
  30.451 -    ENTER_RECV_CR;
  30.452 -    q = recv_any();
  30.453 -    LEAVE_RECV_CR;
  30.454 -    if (!q)
  30.455 -        return -1;
  30.456 -
  30.457 -    m = queuesearch(q);
  30.458 -    recv_recycle_buffer(q);
  30.459 -    if (!m) {
  30.460 -        fprintf(stderr, "Unmatched RX\n");
  30.461 -        goto rxagain;
  30.462 -    }
  30.463 -    */
  30.464 -
  30.465 -    goto checkmatch;
  30.466 -
  30.467 -}
  30.468 -
  30.469 -/* retry
  30.470 - */
  30.471 -static int retry_count = 0;
  30.472 -int retry(bsq_t *qe)
  30.473 -{
  30.474 -    int rc;
  30.475 -    gettimeofday(&(qe->tv_sent), NULL);
  30.476 -    DB("retry to %d luid=%016llx\n", qe->server, qe->message.luid);
  30.477 -    retry_count++;
  30.478 -    rc = sendmsg(bssock, &(qe->msghdr), MSG_DONTWAIT);
  30.479 -    if (rc < 0)
  30.480 -        return rc;
  30.481 -    return 0;
  30.482 -}
  30.483 -
  30.484 -/* queue runner
  30.485 - */
  30.486 -void *queue_runner(void *arg)
  30.487 -{
  30.488 -    for (;;) {
  30.489 -        struct timeval now;
  30.490 -        long long nowus, sus;
  30.491 -        bsq_t *q;
  30.492 -        int r;
  30.493 -
  30.494 -        sleep(1);
  30.495 -
  30.496 -        gettimeofday(&now, NULL);
  30.497 -        nowus = now.tv_usec + now.tv_sec * 1000000;
  30.498 -        ENTER_QUEUE_CR;
  30.499 -        r = retry_count;
  30.500 -        for (q = bs_head; q; q = q->next) {
  30.501 -            sus = q->tv_sent.tv_usec + q->tv_sent.tv_sec * 1000000;
  30.502 -            if ((nowus - sus) > RETRY_TIMEOUT) {
  30.503 -                if (retry(q) < 0) {
  30.504 -                    fprintf(stderr, "Error on sendmsg retry.\n");
  30.505 -                }
  30.506 -            }
  30.507 -        }
  30.508 -        if (r != retry_count) {
  30.509 -            fprintf(stderr, "RETRIES: %u %u\n", retry_count - r, retry_count);
  30.510 -        }
  30.511 -        LEAVE_QUEUE_CR;
  30.512 -    }
  30.513 -}
  30.514 -
  30.515 -/* receive loop
  30.516 - */
  30.517 -void *receive_loop(void *arg)
  30.518 -{
  30.519 -    bsq_t *q, *m;
  30.520 -
  30.521 -    for(;;) {
  30.522 -        q = recv_any();
  30.523 -        if (!q) {
  30.524 -            fprintf(stderr, "recv_any error\n");
  30.525 -        }
  30.526 -        else {
  30.527 -            m = queuesearch(q);
  30.528 -            recv_recycle_buffer(q);
  30.529 -            if (!m) {
  30.530 -                fprintf(stderr, "Unmatched RX\n");
  30.531 -            }
  30.532 -            else {
  30.533 -                DB("RX MATCH");
  30.534 -                RECV_NOTIFY(m->tid);
  30.535 -            }
  30.536 -        }
  30.537 -    }
  30.538 -}
  30.539 -pthread_t pthread_recv;
  30.540 -
  30.541 -/*****************************************************************************
  30.542 - * Reading                                                                   *
  30.543 - *****************************************************************************/
  30.544 -
  30.545 -void *readblock_indiv(int server, uint64_t id) {
  30.546 -    void *block;
  30.547 -    bsq_t *qe;
  30.548 -    int len, rc;
  30.549 -
  30.550 -    qe = (bsq_t *)malloc(sizeof(bsq_t));
  30.551 -    if (!qe) {
  30.552 -        perror("readblock qe malloc");
  30.553 -        return NULL;
  30.554 -    }
  30.555 -    qe->block = NULL;
  30.556 -    
  30.557 -    /*
  30.558 -    qe->block = malloc(BLOCK_SIZE);
  30.559 -    if (!qe->block) {
  30.560 -        perror("readblock qe malloc");
  30.561 -        free((void *)qe);
  30.562 -        return NULL;
  30.563 -    }
  30.564 -    */
  30.565 -
  30.566 -    qe->server = server;
  30.567 -
  30.568 -    qe->message.operation = BSOP_READBLOCK;
  30.569 -    qe->message.flags = 0;
  30.570 -    qe->message.id = id;
  30.571 -    qe->length = MSGBUFSIZE_ID;
  30.572 -
  30.573 -    if (send_message(qe) < 0) {
  30.574 -        perror("readblock sendto");
  30.575 -        goto err;
  30.576 -    }
  30.577 -    
  30.578 -    /*len = recv_message(qe);
  30.579 -    if (len < 0) {
  30.580 -        perror("readblock recv");
  30.581 -        goto err;
  30.582 -    }*/
  30.583 -
  30.584 -    rc = wait_recv(&qe, 1);
  30.585 -    if (rc < 0) {
  30.586 -        perror("readblock recv");
  30.587 -        goto err;
  30.588 -    }
  30.589 -
  30.590 -    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
  30.591 -        fprintf(stderr, "readblock server error\n");
  30.592 -        goto err;
  30.593 -    }
  30.594 -    if (qe->length < MSGBUFSIZE_BLOCK) {
  30.595 -        fprintf(stderr, "readblock recv short (%u)\n", len);
  30.596 -        goto err;
  30.597 -    }
  30.598 -    /* if ((block = malloc(BLOCK_SIZE)) == NULL) {
  30.599 -        perror("readblock malloc");
  30.600 -        goto err;
  30.601 -    }
  30.602 -    memcpy(block, qe->message.block, BLOCK_SIZE);
  30.603 -    */    
  30.604 -    block = qe->block;
  30.605 -
  30.606 -    free((void *)qe);
  30.607 -    return block;
  30.608 -
  30.609 -    err:
  30.610 -    free(qe->block);
  30.611 -    free((void *)qe);
  30.612 -    return NULL;
  30.613 -}
  30.614 -
  30.615 -/**
  30.616 - * readblock: read a block from disk
  30.617 - *   @id: block id to read
  30.618 - *
  30.619 - *   @return: pointer to block, NULL on error
  30.620 - */
  30.621 -void *readblock(uint64_t id) {
  30.622 -    int map = (int)BSID_MAP(id);
  30.623 -    uint64_t xid;
  30.624 -    static int i = CLUSTER_MAX_REPLICAS - 1;
  30.625 -    void *block = NULL;
  30.626 -
  30.627 -    /* special case for the "superblock" just use the first block on the
  30.628 -     * first replica. (extend to blocks < 6 for vdi bug)
  30.629 -     */
  30.630 -    if (id < 6) {
  30.631 -        block = readblock_indiv(bsclusters[map].servers[0], id);
  30.632 -        goto out;
  30.633 -    }
  30.634 -
  30.635 -    i++;
  30.636 -    if (i >= CLUSTER_MAX_REPLICAS)
  30.637 -        i = 0;
  30.638 -    switch (i) {
  30.639 -    case 0:
  30.640 -        xid = BSID_REPLICA0(id);
  30.641 -        break;
  30.642 -    case 1:
  30.643 -        xid = BSID_REPLICA1(id);
  30.644 -        break;
  30.645 -    case 2:
  30.646 -        xid = BSID_REPLICA2(id);
  30.647 -        break;
  30.648 -    }
  30.649 -    
  30.650 -    block = readblock_indiv(bsclusters[map].servers[i], xid);
  30.651 -
  30.652 -    out:
  30.653 -#ifdef BSDEBUG
  30.654 -    if (block)
  30.655 -        fprintf(stderr, "READ:  %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  30.656 -                id,
  30.657 -                (unsigned int)((unsigned char *)block)[0],
  30.658 -                (unsigned int)((unsigned char *)block)[1],
  30.659 -                (unsigned int)((unsigned char *)block)[2],
  30.660 -                (unsigned int)((unsigned char *)block)[3],
  30.661 -                (unsigned int)((unsigned char *)block)[4],
  30.662 -                (unsigned int)((unsigned char *)block)[5],
  30.663 -                (unsigned int)((unsigned char *)block)[6],
  30.664 -                (unsigned int)((unsigned char *)block)[7]);
  30.665 -    else
  30.666 -        fprintf(stderr, "READ:  %016llx NULL\n", id);
  30.667 -#endif
  30.668 -    return block;
  30.669 -}
  30.670 -
  30.671 -/*****************************************************************************
  30.672 - * Writing                                                                   *
  30.673 - *****************************************************************************/
  30.674 -
  30.675 -bsq_t *writeblock_indiv(int server, uint64_t id, void *block) {
  30.676 -
  30.677 -    bsq_t *qe;
  30.678 -    int len;
  30.679 -
  30.680 -    qe = (bsq_t *)malloc(sizeof(bsq_t));
  30.681 -    if (!qe) {
  30.682 -        perror("writeblock qe malloc");
  30.683 -        goto err;
  30.684 -    }
  30.685 -    qe->server = server;
  30.686 -
  30.687 -    qe->message.operation = BSOP_WRITEBLOCK;
  30.688 -    qe->message.flags = 0;
  30.689 -    qe->message.id = id;
  30.690 -    //memcpy(qe->message.block, block, BLOCK_SIZE);
  30.691 -    qe->block = block;
  30.692 -    qe->length = MSGBUFSIZE_BLOCK;
  30.693 -
  30.694 -    if (send_message(qe) < 0) {
  30.695 -        perror("writeblock sendto");
  30.696 -        goto err;
  30.697 -    }
  30.698 -
  30.699 -    return qe;
  30.700 -
  30.701 -    err:
  30.702 -    free((void *)qe);
  30.703 -    return NULL;
  30.704 -}
  30.705 -    
  30.706 -
  30.707 -/**
  30.708 - * writeblock: write an existing block to disk
  30.709 - *   @id: block id
  30.710 - *   @block: pointer to block
  30.711 - *
  30.712 - *   @return: zero on success, -1 on failure
  30.713 - */
  30.714 -int writeblock(uint64_t id, void *block) {
  30.715 -    
  30.716 -    int map = (int)BSID_MAP(id);
  30.717 -    int rep0 = bsclusters[map].servers[0];
  30.718 -    int rep1 = bsclusters[map].servers[1];
  30.719 -    int rep2 = bsclusters[map].servers[2];
  30.720 -    bsq_t *reqs[3];
  30.721 -    int rc;
  30.722 -
  30.723 -    reqs[0] = reqs[1] = reqs[2] = NULL;
  30.724 -
  30.725 -#ifdef BSDEBUG
  30.726 -    fprintf(stderr,
  30.727 -            "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  30.728 -            id,
  30.729 -            (unsigned int)((unsigned char *)block)[0],
  30.730 -            (unsigned int)((unsigned char *)block)[1],
  30.731 -            (unsigned int)((unsigned char *)block)[2],
  30.732 -            (unsigned int)((unsigned char *)block)[3],
  30.733 -            (unsigned int)((unsigned char *)block)[4],
  30.734 -            (unsigned int)((unsigned char *)block)[5],
  30.735 -            (unsigned int)((unsigned char *)block)[6],
  30.736 -            (unsigned int)((unsigned char *)block)[7]);
  30.737 -#endif
  30.738 -
  30.739 -    /* special case for the "superblock" just use the first block on the
  30.740 -     * first replica. (extend to blocks < 6 for vdi bug)
  30.741 -     */
  30.742 -    if (id < 6) {
  30.743 -        reqs[0] = writeblock_indiv(rep0, id, block);
  30.744 -        if (!reqs[0])
  30.745 -            return -1;
  30.746 -        rc = wait_recv(reqs, 1);
  30.747 -        return rc;
  30.748 -    }
  30.749 -
  30.750 -    reqs[0] = writeblock_indiv(rep0, BSID_REPLICA0(id), block);
  30.751 -    if (!reqs[0])
  30.752 -        goto err;
  30.753 -    reqs[1] = writeblock_indiv(rep1, BSID_REPLICA1(id), block);
  30.754 -    if (!reqs[1])
  30.755 -        goto err;
  30.756 -    reqs[2] = writeblock_indiv(rep2, BSID_REPLICA2(id), block);
  30.757 -    if (!reqs[2])
  30.758 -        goto err;
  30.759 -
  30.760 -    rc = wait_recv(reqs, 3);
  30.761 -    if (rc < 0) {
  30.762 -        perror("writeblock recv");
  30.763 -        goto err;
  30.764 -    }
  30.765 -    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
  30.766 -        fprintf(stderr, "writeblock server0 error\n");
  30.767 -        goto err;
  30.768 -    }
  30.769 -    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
  30.770 -        fprintf(stderr, "writeblock server1 error\n");
  30.771 -        goto err;
  30.772 -    }
  30.773 -    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
  30.774 -        fprintf(stderr, "writeblock server2 error\n");
  30.775 -        goto err;
  30.776 -    }
  30.777 -
  30.778 -
  30.779 -    free((void *)reqs[0]);
  30.780 -    free((void *)reqs[1]);
  30.781 -    free((void *)reqs[2]);
  30.782 -    return 0;
  30.783 -
  30.784 -    err:
  30.785 -    if (reqs[0]) {
  30.786 -        dequeue(reqs[0]);
  30.787 -        free((void *)reqs[0]);
  30.788 -    }
  30.789 -    if (reqs[1]) {
  30.790 -        dequeue(reqs[1]);
  30.791 -        free((void *)reqs[1]);
  30.792 -    }
  30.793 -    if (reqs[2]) {
  30.794 -        dequeue(reqs[2]);
  30.795 -        free((void *)reqs[2]);
  30.796 -    }
  30.797 -    return -1;
  30.798 -}
  30.799 -
  30.800 -/*****************************************************************************
  30.801 - * Allocation                                                                *
  30.802 - *****************************************************************************/
  30.803 -
  30.804 -/**
  30.805 - * allocblock: write a new block to disk
  30.806 - *   @block: pointer to block
  30.807 - *
  30.808 - *   @return: new id of block on disk
  30.809 - */
  30.810 -uint64_t allocblock(void *block) {
  30.811 -    return allocblock_hint(block, 0);
  30.812 -}
  30.813 -
  30.814 -bsq_t *allocblock_hint_indiv(int server, void *block, uint64_t hint) {
  30.815 -    bsq_t *qe;
  30.816 -    int len;
  30.817 -
  30.818 -    qe = (bsq_t *)malloc(sizeof(bsq_t));
  30.819 -    if (!qe) {
  30.820 -        perror("allocblock_hint qe malloc");
  30.821 -        goto err;
  30.822 -    }
  30.823 -    qe->server = server;
  30.824 -
  30.825 -    qe->message.operation = BSOP_ALLOCBLOCK;
  30.826 -    qe->message.flags = 0;
  30.827 -    qe->message.id = hint;
  30.828 -    //memcpy(qe->message.block, block, BLOCK_SIZE);
  30.829 -    qe->block = block;
  30.830 -    qe->length = MSGBUFSIZE_BLOCK;
  30.831 -
  30.832 -    if (send_message(qe) < 0) {
  30.833 -        perror("allocblock_hint sendto");
  30.834 -        goto err;
  30.835 -    }
  30.836 -    
  30.837 -    return qe;
  30.838 -
  30.839 -    err:
  30.840 -    free((void *)qe);
  30.841 -    return NULL;
  30.842 -}
  30.843 -
  30.844 -/**
  30.845 - * allocblock_hint: write a new block to disk
  30.846 - *   @block: pointer to block
  30.847 - *   @hint: allocation hint
  30.848 - *
  30.849 - *   @return: new id of block on disk
  30.850 - */
  30.851 -uint64_t allocblock_hint(void *block, uint64_t hint) {
  30.852 -    int map = (int)hint;
  30.853 -    int rep0 = bsclusters[map].servers[0];
  30.854 -    int rep1 = bsclusters[map].servers[1];
  30.855 -    int rep2 = bsclusters[map].servers[2];
  30.856 -    bsq_t *reqs[3];
  30.857 -    int rc;
  30.858 -    uint64_t id0, id1, id2;
  30.859 -
  30.860 -    reqs[0] = reqs[1] = reqs[2] = NULL;
  30.861 -
  30.862 -    DB("ENTER allocblock\n");
  30.863 -
  30.864 -    reqs[0] = allocblock_hint_indiv(rep0, block, hint);
  30.865 -    if (!reqs[0])
  30.866 -        goto err;
  30.867 -    reqs[1] = allocblock_hint_indiv(rep1, block, hint);
  30.868 -    if (!reqs[1])
  30.869 -        goto err;
  30.870 -    reqs[2] = allocblock_hint_indiv(rep2, block, hint);
  30.871 -    if (!reqs[2])
  30.872 -        goto err;
  30.873 -
  30.874 -    rc = wait_recv(reqs, 3);
  30.875 -    if (rc < 0) {
  30.876 -        perror("allocblock recv");
  30.877 -        goto err;
  30.878 -    }
  30.879 -    if ((reqs[0]->message.flags & BSOP_FLAG_ERROR)) {
  30.880 -        fprintf(stderr, "allocblock server0 error\n");
  30.881 -        goto err;
  30.882 -    }
  30.883 -    if ((reqs[1]->message.flags & BSOP_FLAG_ERROR)) {
  30.884 -        fprintf(stderr, "allocblock server1 error\n");
  30.885 -        goto err;
  30.886 -    }
  30.887 -    if ((reqs[2]->message.flags & BSOP_FLAG_ERROR)) {
  30.888 -        fprintf(stderr, "allocblock server2 error\n");
  30.889 -        goto err;
  30.890 -    }
  30.891 -
  30.892 -    id0 = reqs[0]->message.id;
  30.893 -    id1 = reqs[1]->message.id;
  30.894 -    id2 = reqs[2]->message.id;
  30.895 -
  30.896 -#ifdef BSDEBUG
  30.897 -    fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  30.898 -            BSID(map, id0, id1, id2),
  30.899 -            (unsigned int)((unsigned char *)block)[0],
  30.900 -            (unsigned int)((unsigned char *)block)[1],
  30.901 -            (unsigned int)((unsigned char *)block)[2],
  30.902 -            (unsigned int)((unsigned char *)block)[3],
  30.903 -            (unsigned int)((unsigned char *)block)[4],
  30.904 -            (unsigned int)((unsigned char *)block)[5],
  30.905 -            (unsigned int)((unsigned char *)block)[6],
  30.906 -            (unsigned int)((unsigned char *)block)[7]);
  30.907 -#endif
  30.908 -    
  30.909 -    free((void *)reqs[0]);
  30.910 -    free((void *)reqs[1]);
  30.911 -    free((void *)reqs[2]);
  30.912 -    return BSID(map, id0, id1, id2);
  30.913 -
  30.914 -    err:
  30.915 -    if (reqs[0]) {
  30.916 -        dequeue(reqs[0]);
  30.917 -        free((void *)reqs[0]);
  30.918 -    }
  30.919 -    if (reqs[1]) {
  30.920 -        dequeue(reqs[1]);
  30.921 -        free((void *)reqs[1]);
  30.922 -    }
  30.923 -    if (reqs[2]) {
  30.924 -        dequeue(reqs[2]);
  30.925 -        free((void *)reqs[2]);
  30.926 -    }
  30.927 -    return 0;
  30.928 -}
  30.929 -
  30.930 -#else /* /BLOCKSTORE_REMOTE */
  30.931 -
  30.932 -/*****************************************************************************
  30.933 - * Local storage version                                                     *
  30.934 - *****************************************************************************/
  30.935 - 
  30.936 -/**
  30.937 - * readblock: read a block from disk
  30.938 - *   @id: block id to read
  30.939 - *
  30.940 - *   @return: pointer to block, NULL on error
  30.941 - */
  30.942 -
  30.943 -void *readblock(uint64_t id) {
  30.944 -    void *block;
  30.945 -    int block_fp;
  30.946 -   
  30.947 -//printf("readblock(%llu)\n", id); 
  30.948 -    block_fp = open("blockstore.dat", O_RDONLY | O_CREAT | O_LARGEFILE, 0644);
  30.949 -
  30.950 -    if (block_fp < 0) {
  30.951 -        perror("open");
  30.952 -        return NULL;
  30.953 -    }
  30.954 -    
  30.955 -    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  30.956 -        printf ("%Ld ", id);
  30.957 -        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
  30.958 -        perror("readblock lseek");
  30.959 -        goto err;
  30.960 -    }
  30.961 -    if ((block = malloc(BLOCK_SIZE)) == NULL) {
  30.962 -        perror("readblock malloc");
  30.963 -        goto err;
  30.964 -    }
  30.965 -    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
  30.966 -        perror("readblock read");
  30.967 -        free(block);
  30.968 -        goto err;
  30.969 -    }
  30.970 -    close(block_fp);
  30.971 -    return block;
  30.972 -    
  30.973 -err:
  30.974 -    close(block_fp);
  30.975 -    return NULL;
  30.976 -}
  30.977 -
  30.978 -/**
  30.979 - * writeblock: write an existing block to disk
  30.980 - *   @id: block id
  30.981 - *   @block: pointer to block
  30.982 - *
  30.983 - *   @return: zero on success, -1 on failure
  30.984 - */
  30.985 -int writeblock(uint64_t id, void *block) {
  30.986 -    
  30.987 -    int block_fp;
  30.988 -    
  30.989 -    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
  30.990 -
  30.991 -    if (block_fp < 0) {
  30.992 -        perror("open");
  30.993 -        return -1;
  30.994 -    }
  30.995 -
  30.996 -    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  30.997 -        perror("writeblock lseek");
  30.998 -        goto err;
  30.999 -    }
 30.1000 -    if (write(block_fp, block, BLOCK_SIZE) < 0) {
 30.1001 -        perror("writeblock write");
 30.1002 -        goto err;
 30.1003 -    }
 30.1004 -    close(block_fp);
 30.1005 -    return 0;
 30.1006 -
 30.1007 -err:
 30.1008 -    close(block_fp);
 30.1009 -    return -1;
 30.1010 -}
 30.1011 -
 30.1012 -/**
 30.1013 - * allocblock: write a new block to disk
 30.1014 - *   @block: pointer to block
 30.1015 - *
 30.1016 - *   @return: new id of block on disk
 30.1017 - */
 30.1018 -
 30.1019 -uint64_t allocblock(void *block) {
 30.1020 -    uint64_t lb;
 30.1021 -    off64_t pos;
 30.1022 -    int block_fp;
 30.1023 -    
 30.1024 -    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
 30.1025 -
 30.1026 -    if (block_fp < 0) {
 30.1027 -        perror("open");
 30.1028 -        return 0;
 30.1029 -    }
 30.1030 -
 30.1031 -    pos = lseek64(block_fp, 0, SEEK_END);
 30.1032 -    if (pos == (off64_t)-1) {
 30.1033 -        perror("allocblock lseek");
 30.1034 -        goto err;
 30.1035 -    }
 30.1036 -    if (pos % BLOCK_SIZE != 0) {
 30.1037 -        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
 30.1038 -        goto err;
 30.1039 -    }
 30.1040 -    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
 30.1041 -        perror("allocblock write");
 30.1042 -        goto err;
 30.1043 -    }
 30.1044 -    lb = pos / BLOCK_SIZE + 1;
 30.1045 -//printf("alloc(%Ld)\n", lb);
 30.1046 -    close(block_fp);
 30.1047 -    return lb;
 30.1048 -    
 30.1049 -err:
 30.1050 -    close(block_fp);
 30.1051 -    return 0;
 30.1052 -    
 30.1053 -}
 30.1054 -
 30.1055 -/**
 30.1056 - * allocblock_hint: write a new block to disk
 30.1057 - *   @block: pointer to block
 30.1058 - *   @hint: allocation hint
 30.1059 - *
 30.1060 - *   @return: new id of block on disk
 30.1061 - */
 30.1062 -uint64_t allocblock_hint(void *block, uint64_t hint) {
 30.1063 -    return allocblock(block);
 30.1064 -}
 30.1065 -
 30.1066 -#endif /* BLOCKSTORE_REMOTE */
 30.1067 -
 30.1068 -/*****************************************************************************
 30.1069 - * Memory management                                                         *
 30.1070 - *****************************************************************************/
 30.1071 -
 30.1072 -/**
 30.1073 - * newblock: get a new in-memory block set to zeros
 30.1074 - *
 30.1075 - *   @return: pointer to new block, NULL on error
 30.1076 - */
 30.1077 -void *newblock(void) {
 30.1078 -    void *block = malloc(BLOCK_SIZE);
 30.1079 -    if (block == NULL) {
 30.1080 -        perror("newblock");
 30.1081 -        return NULL;
 30.1082 -    }
 30.1083 -    memset(block, 0, BLOCK_SIZE);
 30.1084 -    return block;
 30.1085 -}
 30.1086 -
 30.1087 -
 30.1088 -/**
 30.1089 - * freeblock: unallocate an in-memory block
 30.1090 - *   @id: block id (zero if this is only in-memory)
 30.1091 - *   @block: block to be freed
 30.1092 - */
 30.1093 -void freeblock(void *block) {
 30.1094 -        free(block);
 30.1095 -}
 30.1096 -
 30.1097 -static freeblock_t *new_freeblock(void)
 30.1098 -{
 30.1099 -    freeblock_t *fb;
 30.1100 -    
 30.1101 -    fb = newblock();
 30.1102 -    
 30.1103 -    if (fb == NULL) return NULL;
 30.1104 -    
 30.1105 -    fb->magic = FREEBLOCK_MAGIC;
 30.1106 -    fb->next  = 0ULL;
 30.1107 -    fb->count = 0ULL;
 30.1108 -    memset(fb->list, 0, sizeof fb->list);
 30.1109 -    
 30.1110 -    return fb;
 30.1111 -}
 30.1112 -
 30.1113 -void releaseblock(uint64_t id)
 30.1114 -{
 30.1115 -    blockstore_super_t *bs_super;
 30.1116 -    freeblock_t *fl_current;
 30.1117 -    
 30.1118 -    /* get superblock */
 30.1119 -    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
 30.1120 -    
 30.1121 -    /* get freeblock_current */
 30.1122 -    if (bs_super->freelist_current == 0ULL) 
 30.1123 -    {
 30.1124 -        fl_current = new_freeblock();
 30.1125 -        bs_super->freelist_current = allocblock(fl_current);
 30.1126 -        writeblock(BLOCKSTORE_SUPER, bs_super);
 30.1127 -    } else {
 30.1128 -        fl_current = readblock(bs_super->freelist_current);
 30.1129 -    }
 30.1130 -    
 30.1131 -    /* if full, chain to superblock and allocate new current */
 30.1132 -    
 30.1133 -    if (fl_current->count == FREEBLOCK_SIZE) {
 30.1134 -        fl_current->next = bs_super->freelist_full;
 30.1135 -        writeblock(bs_super->freelist_current, fl_current);
 30.1136 -        bs_super->freelist_full = bs_super->freelist_current;
 30.1137 -        freeblock(fl_current);
 30.1138 -        fl_current = new_freeblock();
 30.1139 -        bs_super->freelist_current = allocblock(fl_current);
 30.1140 -        writeblock(BLOCKSTORE_SUPER, bs_super);
 30.1141 -    }
 30.1142 -    
 30.1143 -    /* append id to current */
 30.1144 -    fl_current->list[fl_current->count++] = id;
 30.1145 -    writeblock(bs_super->freelist_current, fl_current);
 30.1146 -    
 30.1147 -    freeblock(fl_current);
 30.1148 -    freeblock(bs_super);
 30.1149 -    
 30.1150 -    
 30.1151 -}
 30.1152 -
 30.1153 -/* freelist debug functions: */
 30.1154 -void freelist_count(int print_each)
 30.1155 -{
 30.1156 -    blockstore_super_t *bs_super;
 30.1157 -    freeblock_t *fb;
 30.1158 -    uint64_t total = 0, next;
 30.1159 -    
 30.1160 -    bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
 30.1161 -    
 30.1162 -    if (bs_super->freelist_current == 0ULL) {
 30.1163 -        printf("freelist is empty!\n");
 30.1164 -        return;
 30.1165 -    }
 30.1166 -    
 30.1167 -    fb = readblock(bs_super->freelist_current);
 30.1168 -    printf("%Ld entires on current.\n", fb->count);
 30.1169 -    total += fb->count;
 30.1170 -    if (print_each == 1)
 30.1171 -    {
 30.1172 -        int i;
 30.1173 -        for (i=0; i< fb->count; i++)
 30.1174 -            printf("  %Ld\n", fb->list[i]);
 30.1175 -    }
 30.1176 -    
 30.1177 -    freeblock(fb);
 30.1178 -    
 30.1179 -    if (bs_super->freelist_full == 0ULL) {
 30.1180 -        printf("freelist_full is empty!\n");
 30.1181 -        return;
 30.1182 -    }
 30.1183 -    
 30.1184 -    next = bs_super->freelist_full;
 30.1185 -    for (;;) {
 30.1186 -        fb = readblock(next);
 30.1187 -        total += fb->count;
 30.1188 -        if (print_each == 1)
 30.1189 -        {
 30.1190 -            int i;
 30.1191 -            for (i=0; i< fb->count; i++)
 30.1192 -                printf("  %Ld\n", fb->list[i]);
 30.1193 -        }
 30.1194 -        next = fb->next;
 30.1195 -        freeblock(fb);
 30.1196 -        if (next == 0ULL) break;
 30.1197 -    }
 30.1198 -    printf("Total of %Ld ids on freelist.\n", total);
 30.1199 -}
 30.1200 -
 30.1201 -/*****************************************************************************
 30.1202 - * Initialisation                                                            *
 30.1203 - *****************************************************************************/
 30.1204 -
 30.1205 -int __init_blockstore(void)
 30.1206 -{
 30.1207 -    int i;
 30.1208 -    blockstore_super_t *bs_super;
 30.1209 -    uint64_t ret;
 30.1210 -    int block_fp;
 30.1211 -    
 30.1212 -#ifdef BLOCKSTORE_REMOTE
 30.1213 -    struct hostent *addr;
 30.1214 -
 30.1215 -    pthread_mutex_init(&ptmutex_queue, NULL);
 30.1216 -    pthread_mutex_init(&ptmutex_luid, NULL);
 30.1217 -    pthread_mutex_init(&ptmutex_recv, NULL);
 30.1218 -    /*pthread_mutex_init(&ptmutex_notify, NULL);*/
 30.1219 -    for (i = 0; i <= READ_POOL_SIZE; i++) {
 30.1220 -        pool_thread[i].newdata = 0;
 30.1221 -        pthread_mutex_init(&(pool_thread[i].ptmutex), NULL);
 30.1222 -        pthread_cond_init(&(pool_thread[i].ptcv), NULL);
 30.1223 -    }
 30.1224 -
 30.1225 -    bsservers[0].hostname = "firebug.cl.cam.ac.uk";
 30.1226 -    bsservers[1].hostname = "planb.cl.cam.ac.uk";
 30.1227 -    bsservers[2].hostname = "simcity.cl.cam.ac.uk";
 30.1228 -    bsservers[3].hostname = NULL/*"gunfighter.cl.cam.ac.uk"*/;
 30.1229 -    bsservers[4].hostname = NULL/*"galaxian.cl.cam.ac.uk"*/;
 30.1230 -    bsservers[5].hostname = NULL/*"firetrack.cl.cam.ac.uk"*/;
 30.1231 -    bsservers[6].hostname = NULL/*"funfair.cl.cam.ac.uk"*/;
 30.1232 -    bsservers[7].hostname = NULL/*"felix.cl.cam.ac.uk"*/;
 30.1233 -    bsservers[8].hostname = NULL;
 30.1234 -    bsservers[9].hostname = NULL;
 30.1235 -    bsservers[10].hostname = NULL;
 30.1236 -    bsservers[11].hostname = NULL;
 30.1237 -    bsservers[12].hostname = NULL;
 30.1238 -    bsservers[13].hostname = NULL;
 30.1239 -    bsservers[14].hostname = NULL;
 30.1240 -    bsservers[15].hostname = NULL;
 30.1241 -
 30.1242 -    for (i = 0; i < MAX_SERVERS; i++) {
 30.1243 -        if (!bsservers[i].hostname)
 30.1244 -            continue;
 30.1245 -        addr = gethostbyname(bsservers[i].hostname);
 30.1246 -        if (!addr) {
 30.1247 -            perror("bad hostname");
 30.1248 -            return -1;
 30.1249 -        }
 30.1250 -        bsservers[i].sin.sin_family = addr->h_addrtype;
 30.1251 -        bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
 30.1252 -        bsservers[i].sin.sin_addr.s_addr = 
 30.1253 -            ((struct in_addr *)(addr->h_addr))->s_addr;
 30.1254 -    }
 30.1255 -
 30.1256 -    /* Cluster map
 30.1257 -     */
 30.1258 -    bsclusters[0].servers[0] = 0;
 30.1259 -    bsclusters[0].servers[1] = 1;
 30.1260 -    bsclusters[0].servers[2] = 2;
 30.1261 -    bsclusters[1].servers[0] = 1;
 30.1262 -    bsclusters[1].servers[1] = 2;
 30.1263 -    bsclusters[1].servers[2] = 3;
 30.1264 -    bsclusters[2].servers[0] = 2;
 30.1265 -    bsclusters[2].servers[1] = 3;
 30.1266 -    bsclusters[2].servers[2] = 4;
 30.1267 -    bsclusters[3].servers[0] = 3;
 30.1268 -    bsclusters[3].servers[1] = 4;
 30.1269 -    bsclusters[3].servers[2] = 5;
 30.1270 -    bsclusters[4].servers[0] = 4;
 30.1271 -    bsclusters[4].servers[1] = 5;
 30.1272 -    bsclusters[4].servers[2] = 6;
 30.1273 -    bsclusters[5].servers[0] = 5;
 30.1274 -    bsclusters[5].servers[1] = 6;
 30.1275 -    bsclusters[5].servers[2] = 7;
 30.1276 -    bsclusters[6].servers[0] = 6;
 30.1277 -    bsclusters[6].servers[1] = 7;
 30.1278 -    bsclusters[6].servers[2] = 0;
 30.1279 -    bsclusters[7].servers[0] = 7;
 30.1280 -    bsclusters[7].servers[1] = 0;
 30.1281 -    bsclusters[7].servers[2] = 1;
 30.1282 -
 30.1283 -    /* Local socket set up
 30.1284 -     */
 30.1285 -    bssock = socket(AF_INET, SOCK_DGRAM, 0);
 30.1286 -    if (bssock < 0) {
 30.1287 -        perror("Bad socket");
 30.1288 -        return -1;
 30.1289 -    }
 30.1290 -    memset(&sin_local, 0, sizeof(sin_local));
 30.1291 -    sin_local.sin_family = AF_INET;
 30.1292 -    sin_local.sin_port = htons(BLOCKSTORED_PORT);
 30.1293 -    sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
 30.1294 -    if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
 30.1295 -        perror("bind");
 30.1296 -        close(bssock);
 30.1297 -        return -1;
 30.1298 -    }
 30.1299 -
 30.1300 -    pthread_create(&pthread_recv, NULL, receive_loop, NULL);
 30.1301 -    pthread_create(&pthread_recv, NULL, queue_runner, NULL);
 30.1302 -
 30.1303 -#else /* /BLOCKSTORE_REMOTE */
 30.1304 -    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
 30.1305 -
 30.1306 -    if (block_fp < 0) {
 30.1307 -        perror("open");
 30.1308 -        return -1;
 30.1309 -        exit(-1);
 30.1310 -    }
 30.1311 -    
 30.1312 -    if (lseek(block_fp, 0, SEEK_END) == 0) {
 30.1313 -        bs_super = newblock();
 30.1314 -        bs_super->magic            = BLOCKSTORE_MAGIC;
 30.1315 -        bs_super->freelist_full    = 0LL;
 30.1316 -        bs_super->freelist_current = 0LL;
 30.1317 -        
 30.1318 -        ret = allocblock(bs_super);
 30.1319 -        
 30.1320 -        freeblock(bs_super);
 30.1321 -    } else {
 30.1322 -        bs_super = (blockstore_super_t *) readblock(BLOCKSTORE_SUPER);
 30.1323 -        if (bs_super->magic != BLOCKSTORE_MAGIC)
 30.1324 -        {
 30.1325 -            printf("BLOCKSTORE IS CORRUPT! (no magic in superblock!)\n");
 30.1326 -            exit(-1);
 30.1327 -        }
 30.1328 -        freeblock(bs_super);
 30.1329 -    }
 30.1330 -        
 30.1331 -    close(block_fp);
 30.1332 -        
 30.1333 -#endif /*  BLOCKSTORE_REMOTE */   
 30.1334 -    return 0;
 30.1335 -}
 30.1336 -
 30.1337 -void __exit_blockstore(void)
 30.1338 -{
 30.1339 -    int i;
 30.1340 -#ifdef BLOCKSTORE_REMOTE
 30.1341 -    pthread_mutex_destroy(&ptmutex_recv);
 30.1342 -    pthread_mutex_destroy(&ptmutex_luid);
 30.1343 -    pthread_mutex_destroy(&ptmutex_queue);
 30.1344 -    /*pthread_mutex_destroy(&ptmutex_notify);
 30.1345 -      pthread_cond_destroy(&ptcv_notify);*/
 30.1346 -    for (i = 0; i <= READ_POOL_SIZE; i++) {
 30.1347 -        pthread_mutex_destroy(&(pool_thread[i].ptmutex));
 30.1348 -        pthread_cond_destroy(&(pool_thread[i].ptcv));
 30.1349 -    }
 30.1350 -#endif
 30.1351 -}
    31.1 --- a/tools/blktap/parallax/blockstore.h	Fri Jun 23 15:26:01 2006 -0600
    31.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.3 @@ -1,134 +0,0 @@
    31.4 -/**************************************************************************
    31.5 - * 
    31.6 - * blockstore.h
    31.7 - *
    31.8 - * Simple block store interface
    31.9 - *
   31.10 - */
   31.11 - 
   31.12 -#ifndef __BLOCKSTORE_H__
   31.13 -#define __BLOCKSTORE_H__
   31.14 -
   31.15 -#include <netinet/in.h>
   31.16 -#include <xenctrl.h>
   31.17 -
   31.18 -#define BLOCK_SIZE  4096
   31.19 -#define BLOCK_SHIFT   12
   31.20 -#define BLOCK_MASK  0xfffffffffffff000LL
   31.21 -
   31.22 -/* XXX SMH: where is the below supposed to be defined???? */
   31.23 -#ifndef SECTOR_SHIFT 
   31.24 -#define SECTOR_SHIFT   9 
   31.25 -#endif
   31.26 -
   31.27 -#define FREEBLOCK_SIZE  (BLOCK_SIZE / sizeof(uint64_t)) - (3 * sizeof(uint64_t))
   31.28 -#define FREEBLOCK_MAGIC 0x0fee0fee0fee0feeULL
   31.29 -
   31.30 -typedef struct {
   31.31 -    uint64_t magic;
   31.32 -    uint64_t next;
   31.33 -    uint64_t count;
   31.34 -    uint64_t list[FREEBLOCK_SIZE];
   31.35 -} freeblock_t; 
   31.36 -
   31.37 -#define BLOCKSTORE_MAGIC 0xaaaaaaa00aaaaaaaULL
   31.38 -#define BLOCKSTORE_SUPER 1ULL
   31.39 -
   31.40 -typedef struct {
   31.41 -    uint64_t magic;
   31.42 -    uint64_t freelist_full;
   31.43 -    uint64_t freelist_current;
   31.44 -} blockstore_super_t;
   31.45 -
   31.46 -extern void *newblock();
   31.47 -extern void *readblock(uint64_t id);
   31.48 -extern uint64_t allocblock(void *block);
   31.49 -extern uint64_t allocblock_hint(void *block, uint64_t hint);
   31.50 -extern int writeblock(uint64_t id, void *block);
   31.51 -
   31.52 -/* Add this blockid to a freelist, to be recycled by the allocator. */
   31.53 -extern void releaseblock(uint64_t id);
   31.54 -
   31.55 -/* this is a memory free() operation for block-sized allocations */
   31.56 -extern void freeblock(void *block);
   31.57 -extern int __init_blockstore(void);
   31.58 -
   31.59 -/* debug for freelist. */
   31.60 -void freelist_count(int print_each);
   31.61 -#define ALLOCFAIL (((uint64_t)(-1)))
   31.62 -
   31.63 -/* Distribution
   31.64 - */
   31.65 -#define BLOCKSTORED_PORT 9346
   31.66 -
   31.67 -struct bshdr_t_struct {
   31.68 -    uint32_t            operation;
   31.69 -    uint32_t            flags;
   31.70 -    uint64_t            id;
   31.71 -    uint64_t            luid;
   31.72 -} __attribute__ ((packed));
   31.73 -typedef struct bshdr_t_struct bshdr_t;
   31.74 -
   31.75 -struct bsmsg_t_struct {
   31.76 -    bshdr_t        hdr;
   31.77 -    unsigned char  block[BLOCK_SIZE];
   31.78 -} __attribute__ ((packed));
   31.79 -
   31.80 -typedef struct bsmsg_t_struct bsmsg_t;
   31.81 -
   31.82 -#define MSGBUFSIZE_OP    sizeof(uint32_t)
   31.83 -#define MSGBUFSIZE_FLAGS (sizeof(uint32_t) + sizeof(uint32_t))
   31.84 -#define MSGBUFSIZE_ID    (sizeof(uint32_t) + sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint64_t))
   31.85 -#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
   31.86 -
   31.87 -#define BSOP_READBLOCK  0x01
   31.88 -#define BSOP_WRITEBLOCK 0x02
   31.89 -#define BSOP_ALLOCBLOCK 0x03
   31.90 -#define BSOP_FREEBLOCK  0x04
   31.91 -
   31.92 -#define BSOP_FLAG_ERROR 0x01
   31.93 -
   31.94 -#define BS_ALLOC_SKIP 10
   31.95 -#define BS_ALLOC_HACK
   31.96 -
   31.97 -/* Remote hosts and cluster map - XXX need to generalise
   31.98 - */
   31.99 -
  31.100 -/*
  31.101 -
  31.102 -  Interim ID format is
  31.103 -
  31.104 -  63 60 59                40 39                20 19                 0
  31.105 -  +----+--------------------+--------------------+--------------------+
  31.106 -  |map | replica 2          | replica 1          | replica 0          |
  31.107 -  +----+--------------------+--------------------+--------------------+
  31.108 -
  31.109 -  The map is an index into a table detailing which machines form the
  31.110 -  cluster.
  31.111 -
  31.112 - */
  31.113 -
  31.114 -#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
  31.115 -#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
  31.116 -#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
  31.117 -#define BSID_MAP(_id)      (((_id)>>60)&0xfULL)
  31.118 -
  31.119 -#define BSID(_map, _rep0, _rep1, _rep2) ((((uint64_t)(_map))<<60) | \
  31.120 -                                         (((uint64_t)(_rep2))<<40) | \
  31.121 -                                         (((uint64_t)(_rep1))<<20) | ((uint64_t)(_rep0)))
  31.122 -
  31.123 -typedef struct bsserver_t_struct {
  31.124 -    char              *hostname;
  31.125 -    struct sockaddr_in sin;
  31.126 -} bsserver_t;
  31.127 -
  31.128 -#define MAX_SERVERS 16
  31.129 -
  31.130 -#define CLUSTER_MAX_REPLICAS 3
  31.131 -typedef struct bscluster_t_struct {
  31.132 -    int servers[CLUSTER_MAX_REPLICAS];
  31.133 -} bscluster_t;
  31.134 -
  31.135 -#define MAX_CLUSTERS 16
  31.136 -
  31.137 -#endif /* __BLOCKSTORE_H__ */
    32.1 --- a/tools/blktap/parallax/blockstored.c	Fri Jun 23 15:26:01 2006 -0600
    32.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.3 @@ -1,275 +0,0 @@
    32.4 -/**************************************************************************
    32.5 - * 
    32.6 - * blockstored.c
    32.7 - *
    32.8 - * Block store daemon.
    32.9 - *
   32.10 - */
   32.11 -
   32.12 -#include <fcntl.h>
   32.13 -#include <unistd.h>
   32.14 -#include <stdio.h>
   32.15 -#include <stdlib.h>
   32.16 -#include <string.h>
   32.17 -#include <sys/types.h>
   32.18 -#include <sys/stat.h>
   32.19 -#include <sys/socket.h>
   32.20 -#include <sys/ioctl.h>
   32.21 -#include <netinet/in.h>
   32.22 -#include <errno.h>
   32.23 -#include "blockstore.h"
   32.24 -
   32.25 -//#define BSDEBUG
   32.26 -
   32.27 -int readblock_into(uint64_t id, void *block);
   32.28 -
   32.29 -int open_socket(uint16_t port) {
   32.30 -    
   32.31 -    struct sockaddr_in sn;
   32.32 -    int sock;
   32.33 -
   32.34 -    sock = socket(AF_INET, SOCK_DGRAM, 0);
   32.35 -    if (sock < 0) {
   32.36 -        perror("Bad socket");
   32.37 -        return -1;
   32.38 -    }
   32.39 -    memset(&sn, 0, sizeof(sn));
   32.40 -    sn.sin_family = AF_INET;
   32.41 -    sn.sin_port = htons(port);
   32.42 -    sn.sin_addr.s_addr = htonl(INADDR_ANY);
   32.43 -    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
   32.44 -        perror("bind");
   32.45 -        close(sock);
   32.46 -        return -1;
   32.47 -    }
   32.48 -
   32.49 -    return sock;
   32.50 -}
   32.51 -
   32.52 -static int block_fp = -1;
   32.53 -static int bssock = -1;
   32.54 -
   32.55 -int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
   32.56 -
   32.57 -    int rc;
   32.58 -    
   32.59 -#ifdef BSDEBUG
   32.60 -    fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
   32.61 -            len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t *)buffer)->hdr.id);
   32.62 -#endif
   32.63 -    rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, sizeof(*peer));
   32.64 -    if (rc < 0) {
   32.65 -        perror("send_reply");
   32.66 -        return 1;
   32.67 -    }
   32.68 -
   32.69 -
   32.70 -    return 0;
   32.71 -}
   32.72 -
   32.73 -static bsmsg_t msgbuf;
   32.74 -
   32.75 -void service_loop(void) {
   32.76 -
   32.77 -    for (;;) {
   32.78 -        int rc, len;
   32.79 -        struct sockaddr_in from;
   32.80 -        size_t slen = sizeof(from);
   32.81 -        uint64_t bid;
   32.82 -
   32.83 -        len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
   32.84 -                       (struct sockaddr *)&from, &slen);
   32.85 -
   32.86 -        if (len < 0) {
   32.87 -            perror("recvfrom");
   32.88 -            continue;
   32.89 -        }
   32.90 -
   32.91 -        if (len < MSGBUFSIZE_OP) {
   32.92 -            fprintf(stderr, "Short packet.\n");
   32.93 -            continue;
   32.94 -        }
   32.95 -
   32.96 -#ifdef BSDEBUG
   32.97 -        fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
   32.98 -                len, msgbuf.hdr.operation, msgbuf.hdr.id);
   32.99 -#endif
  32.100 -
  32.101 -        switch (msgbuf.hdr.operation) {
  32.102 -        case BSOP_READBLOCK:
  32.103 -            if (len < MSGBUFSIZE_ID) {
  32.104 -                fprintf(stderr, "Short packet (readblock %u).\n", len);
  32.105 -                continue;
  32.106 -            }
  32.107 -            rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
  32.108 -            if (rc < 0) {
  32.109 -                fprintf(stderr, "readblock error\n");
  32.110 -                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  32.111 -                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  32.112 -                continue;
  32.113 -            }
  32.114 -            msgbuf.hdr.flags = 0;
  32.115 -            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
  32.116 -            break;
  32.117 -        case BSOP_WRITEBLOCK:
  32.118 -            if (len < MSGBUFSIZE_BLOCK) {
  32.119 -                fprintf(stderr, "Short packet (writeblock %u).\n", len);
  32.120 -                continue;
  32.121 -            }
  32.122 -            rc = writeblock(msgbuf.hdr.id, msgbuf.block);
  32.123 -            if (rc < 0) {
  32.124 -                fprintf(stderr, "writeblock error\n");
  32.125 -                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  32.126 -                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  32.127 -                continue;
  32.128 -            }
  32.129 -            msgbuf.hdr.flags = 0;
  32.130 -            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  32.131 -            break;
  32.132 -        case BSOP_ALLOCBLOCK:
  32.133 -            if (len < MSGBUFSIZE_BLOCK) {
  32.134 -                fprintf(stderr, "Short packet (allocblock %u).\n", len);
  32.135 -                continue;
  32.136 -            }
  32.137 -            bid = allocblock(msgbuf.block);
  32.138 -            if (bid == ALLOCFAIL) {
  32.139 -                fprintf(stderr, "allocblock error\n");
  32.140 -                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  32.141 -                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  32.142 -                continue;
  32.143 -            }
  32.144 -            msgbuf.hdr.id = bid;
  32.145 -            msgbuf.hdr.flags = 0;
  32.146 -            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  32.147 -            break;
  32.148 -        }
  32.149 -
  32.150 -    }
  32.151 -}
  32.152 - 
  32.153 -/**
  32.154 - * readblock: read a block from disk
  32.155 - *   @id: block id to read
  32.156 - *   @block: pointer to buffer to receive block
  32.157 - *
  32.158 - *   @return: 0 if OK, other on error
  32.159 - */
  32.160 -
  32.161 -int readblock_into(uint64_t id, void *block) {
  32.162 -    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  32.163 -        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
  32.164 -        perror("readblock lseek");
  32.165 -        return -1;
  32.166 -    }
  32.167 -    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
  32.168 -        perror("readblock read");
  32.169 -        return -1;
  32.170 -    }
  32.171 -    return 0;
  32.172 -}
  32.173 -
  32.174 -/**
  32.175 - * writeblock: write an existing block to disk
  32.176 - *   @id: block id
  32.177 - *   @block: pointer to block
  32.178 - *
  32.179 - *   @return: zero on success, -1 on failure
  32.180 - */
  32.181 -int writeblock(uint64_t id, void *block) {
  32.182 -    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  32.183 -        perror("writeblock lseek");
  32.184 -        return -1;
  32.185 -    }
  32.186 -    if (write(block_fp, block, BLOCK_SIZE) < 0) {
  32.187 -        perror("writeblock write");
  32.188 -        return -1;
  32.189 -    }
  32.190 -    return 0;
  32.191 -}
  32.192 -
  32.193 -/**
  32.194 - * allocblock: write a new block to disk
  32.195 - *   @block: pointer to block
  32.196 - *
  32.197 - *   @return: new id of block on disk
  32.198 - */
  32.199 -static uint64_t lastblock = 0;
  32.200 -
  32.201 -uint64_t allocblock(void *block) {
  32.202 -    uint64_t lb;
  32.203 -    off64_t pos;
  32.204 -
  32.205 -    retry:
  32.206 -    pos = lseek64(block_fp, 0, SEEK_END);
  32.207 -    if (pos == (off64_t)-1) {
  32.208 -        perror("allocblock lseek");
  32.209 -        return ALLOCFAIL;
  32.210 -    }
  32.211 -    if (pos % BLOCK_SIZE != 0) {
  32.212 -        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
  32.213 -        return ALLOCFAIL;
  32.214 -    }
  32.215 -    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
  32.216 -        perror("allocblock write");
  32.217 -        return ALLOCFAIL;
  32.218 -    }
  32.219 -    lb = pos / BLOCK_SIZE + 1;
  32.220 -
  32.221 -#ifdef BS_ALLOC_HACK
  32.222 -    if (lb < BS_ALLOC_SKIP)
  32.223 -        goto retry;
  32.224 -#endif
  32.225 -    
  32.226 -    if (lb <= lastblock)
  32.227 -        printf("[*** %Ld alredy allocated! ***]\n", lb);
  32.228 -    
  32.229 -    lastblock = lb;
  32.230 -    return lb;
  32.231 -}
  32.232 -
  32.233 -/**
  32.234 - * newblock: get a new in-memory block set to zeros
  32.235 - *
  32.236 - *   @return: pointer to new block, NULL on error
  32.237 - */
  32.238 -void *newblock(void) {
  32.239 -    void *block = malloc(BLOCK_SIZE);
  32.240 -    if (block == NULL) {
  32.241 -        perror("newblock");
  32.242 -        return NULL;
  32.243 -    }
  32.244 -    memset(block, 0, BLOCK_SIZE);
  32.245 -    return block;
  32.246 -}
  32.247 -
  32.248 -
  32.249 -/**
  32.250 - * freeblock: unallocate an in-memory block
  32.251 - *   @id: block id (zero if this is only in-memory)
  32.252 - *   @block: block to be freed
  32.253 - */
  32.254 -void freeblock(void *block) {
  32.255 -        free(block);
  32.256 -}
  32.257 -
  32.258 -
  32.259 -int main(int argc, char **argv)
  32.260 -{
  32.261 -    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
  32.262 -
  32.263 -    if (block_fp < 0) {
  32.264 -        perror("open");
  32.265 -        return -1;
  32.266 -    }
  32.267 -
  32.268 -    bssock = open_socket(BLOCKSTORED_PORT);
  32.269 -    if (bssock < 0) {
  32.270 -        return -1;
  32.271 -    }
  32.272 -
  32.273 -    service_loop();
  32.274 -    
  32.275 -    close(bssock);
  32.276 -
  32.277 -    return 0;
  32.278 -}
    33.1 --- a/tools/blktap/parallax/bstest.c	Fri Jun 23 15:26:01 2006 -0600
    33.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.3 @@ -1,191 +0,0 @@
    33.4 -/**************************************************************************
    33.5 - * 
    33.6 - * bstest.c
    33.7 - *
    33.8 - * Block store daemon test program.
    33.9 - *
   33.10 - * usage: bstest <host>|X {r|w|a} ID 
   33.11 - *
   33.12 - */
   33.13 -
   33.14 -#include <fcntl.h>
   33.15 -#include <unistd.h>
   33.16 -#include <stdio.h>
   33.17 -#include <stdlib.h>
   33.18 -#include <string.h>
   33.19 -#include <sys/types.h>
   33.20 -#include <sys/stat.h>
   33.21 -#include <sys/socket.h>
   33.22 -#include <sys/ioctl.h>
   33.23 -#include <netinet/in.h>
   33.24 -#include <netdb.h>
   33.25 -#include <errno.h>
   33.26 -#include "blockstore.h"
   33.27 -
   33.28 -int direct(char *host, uint32_t op, uint64_t id, int len) {
   33.29 -    struct sockaddr_in sn, peer;
   33.30 -    int sock;
   33.31 -    bsmsg_t msgbuf;
   33.32 -    int rc, slen;
   33.33 -    struct hostent *addr;
   33.34 -
   33.35 -    addr = gethostbyname(host);
   33.36 -    if (!addr) {
   33.37 -        perror("bad hostname");
   33.38 -        exit(1);
   33.39 -    }
   33.40 -    peer.sin_family = addr->h_addrtype;
   33.41 -    peer.sin_port = htons(BLOCKSTORED_PORT);
   33.42 -    peer.sin_addr.s_addr =  ((struct in_addr *)(addr->h_addr))->s_addr;
   33.43 -    fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
   33.44 -            (unsigned int)(unsigned char)addr->h_addr[0],
   33.45 -            (unsigned int)(unsigned char)addr->h_addr[1],
   33.46 -            (unsigned int)(unsigned char)addr->h_addr[2],
   33.47 -            (unsigned int)(unsigned char)addr->h_addr[3]);
   33.48 -
   33.49 -    sock = socket(AF_INET, SOCK_DGRAM, 0);
   33.50 -    if (sock < 0) {
   33.51 -        perror("Bad socket");
   33.52 -        exit(1);
   33.53 -    }
   33.54 -    memset(&sn, 0, sizeof(sn));
   33.55 -    sn.sin_family = AF_INET;
   33.56 -    sn.sin_port = htons(BLOCKSTORED_PORT);
   33.57 -    sn.sin_addr.s_addr = htonl(INADDR_ANY);
   33.58 -    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
   33.59 -        perror("bind");
   33.60 -        close(sock);
   33.61 -        exit(1);
   33.62 -    }
   33.63 -
   33.64 -    memset((void *)&msgbuf, 0, sizeof(msgbuf));
   33.65 -    msgbuf.operation = op;
   33.66 -    msgbuf.id = id;
   33.67 -
   33.68 -    rc = sendto(sock, (void *)&msgbuf, len, 0,
   33.69 -                (struct sockaddr *)&peer, sizeof(peer));
   33.70 -    if (rc < 0) {
   33.71 -        perror("sendto");
   33.72 -        exit(1);
   33.73 -    }
   33.74 -
   33.75 -    slen = sizeof(peer);
   33.76 -    len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
   33.77 -                   (struct sockaddr *)&peer, &slen);
   33.78 -    if (len < 0) {
   33.79 -        perror("recvfrom");
   33.80 -        exit(1);
   33.81 -    }
   33.82 -
   33.83 -    printf("Reply %u bytes:\n", len);
   33.84 -    if (len >= MSGBUFSIZE_OP)
   33.85 -        printf("  operation: %u\n", msgbuf.operation);
   33.86 -    if (len >= MSGBUFSIZE_FLAGS)
   33.87 -        printf("  flags: 0x%x\n", msgbuf.flags);
   33.88 -    if (len >= MSGBUFSIZE_ID)
   33.89 -        printf("  id: %llu\n", msgbuf.id);
   33.90 -    if (len >= (MSGBUFSIZE_ID + 4))
   33.91 -        printf("  data: %02x %02x %02x %02x...\n",
   33.92 -               (unsigned int)msgbuf.block[0],
   33.93 -               (unsigned int)msgbuf.block[1],
   33.94 -               (unsigned int)msgbuf.block[2],
   33.95 -               (unsigned int)msgbuf.block[3]);
   33.96 -    
   33.97 -    if (sock > 0)
   33.98 -        close(sock);
   33.99 -   
  33.100 -    return 0;
  33.101 -}
  33.102 -
  33.103 -int main (int argc, char **argv) {
  33.104 -
  33.105 -    uint32_t op = 0;
  33.106 -    uint64_t id = 0;
  33.107 -    int len = 0, rc;
  33.108 -    void *block;
  33.109 -
  33.110 -    if (argc < 3) {
  33.111 -        fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
  33.112 -        return 1;
  33.113 -    }
  33.114 -
  33.115 -    switch (argv[2][0]) {
  33.116 -    case 'r':
  33.117 -    case 'R':
  33.118 -        op = BSOP_READBLOCK;
  33.119 -        len = MSGBUFSIZE_ID;
  33.120 -        break;
  33.121 -    case 'w':
  33.122 -    case 'W':
  33.123 -        op = BSOP_WRITEBLOCK;
  33.124 -        len = MSGBUFSIZE_BLOCK;
  33.125 -        break;
  33.126 -    case 'a':
  33.127 -    case 'A':
  33.128 -        op = BSOP_ALLOCBLOCK;
  33.129 -        len = MSGBUFSIZE_BLOCK;
  33.130 -        break;
  33.131 -    default:
  33.132 -        fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
  33.133 -        return 1;
  33.134 -    }
  33.135 -
  33.136 -    if (argc >= 4)
  33.137 -        id = atoll(argv[3]);
  33.138 -
  33.139 -    if (strcmp(argv[1], "X") == 0) {
  33.140 -        rc = __init_blockstore();
  33.141 -        if (rc < 0) {
  33.142 -            fprintf(stderr, "blockstore init failed.\n");
  33.143 -            return 1;
  33.144 -        }
  33.145 -        switch(op) {
  33.146 -        case BSOP_READBLOCK:
  33.147 -            block = readblock(id);
  33.148 -            if (block) {
  33.149 -                printf("data: %02x %02x %02x %02x...\n",
  33.150 -                       (unsigned int)((unsigned char*)block)[0],
  33.151 -                       (unsigned int)((unsigned char*)block)[1],
  33.152 -                       (unsigned int)((unsigned char*)block)[2],
  33.153 -                       (unsigned int)((unsigned char*)block)[3]);
  33.154 -            }
  33.155 -            break;
  33.156 -        case BSOP_WRITEBLOCK:
  33.157 -            block = malloc(BLOCK_SIZE);
  33.158 -            if (!block) {
  33.159 -                perror("bstest malloc");
  33.160 -                return 1;
  33.161 -            }
  33.162 -            memset(block, 0, BLOCK_SIZE);
  33.163 -            rc = writeblock(id, block);
  33.164 -            if (rc != 0) {
  33.165 -                printf("error\n");
  33.166 -            }
  33.167 -            else {
  33.168 -                printf("OK\n");
  33.169 -            }
  33.170 -            break;
  33.171 -        case BSOP_ALLOCBLOCK:
  33.172 -            block = malloc(BLOCK_SIZE);
  33.173 -            if (!block) {
  33.174 -                perror("bstest malloc");
  33.175 -                return 1;
  33.176 -            }
  33.177 -            memset(block, 0, BLOCK_SIZE);
  33.178 -            id = allocblock_hint(block, id);
  33.179 -            if (id == 0) {
  33.180 -                printf("error\n");
  33.181 -            }
  33.182 -            else {
  33.183 -                printf("ID: %llu\n", id);
  33.184 -            }
  33.185 -            break;
  33.186 -        }
  33.187 -    }
  33.188 -    else {
  33.189 -        direct(argv[1], op, id, len);
  33.190 -    }
  33.191 -
  33.192 -
  33.193 -    return 0;
  33.194 -}
    34.1 --- a/tools/blktap/parallax/parallax.c	Fri Jun 23 15:26:01 2006 -0600
    34.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.3 @@ -1,608 +0,0 @@
    34.4 -/**************************************************************************
    34.5 - * 
    34.6 - * parallax.c
    34.7 - *
    34.8 - * The Parallax Storage Server
    34.9 - *
   34.10 - */
   34.11 - 
   34.12 -
   34.13 -#include <stdio.h>
   34.14 -#include <stdlib.h>
   34.15 -#include <string.h>
   34.16 -#include <pthread.h>
   34.17 -#include "blktaplib.h"
   34.18 -#include "blockstore.h"
   34.19 -#include "vdi.h"
   34.20 -#include "block-async.h"
   34.21 -#include "requests-async.h"
   34.22 -
   34.23 -#define PARALLAX_DEV     61440
   34.24 -#define SECTS_PER_NODE   8
   34.25 -
   34.26 -
   34.27 -#if 0
   34.28 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   34.29 -#else
   34.30 -#define DPRINTF(_f, _a...) ((void)0)
   34.31 -#endif
   34.32 -
   34.33 -/* ------[ session records ]----------------------------------------------- */
   34.34 -
   34.35 -#define BLKIF_HASHSZ 1024
   34.36 -#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
   34.37 -
   34.38 -#define VDI_HASHSZ 16
   34.39 -#define VDI_HASH(_vd) ((((_vd)>>8)^(_vd))&(VDI_HASHSZ-1))
   34.40 -
   34.41 -typedef struct blkif {
   34.42 -    domid_t       domid;
   34.43 -    unsigned int  handle;
   34.44 -    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
   34.45 -    vdi_t        *vdi_hash[VDI_HASHSZ];
   34.46 -    struct blkif *hash_next;
   34.47 -} blkif_t;
   34.48 -
   34.49 -static blkif_t      *blkif_hash[BLKIF_HASHSZ];
   34.50 -
   34.51 -blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   34.52 -{
   34.53 -    if ( handle != 0 )
   34.54 -        printf("blktap/parallax don't currently support non-0 dev handles!\n");
   34.55 -    
   34.56 -    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
   34.57 -    while ( (blkif != NULL) && 
   34.58 -            ((blkif->domid != domid) || (blkif->handle != handle)) )
   34.59 -        blkif = blkif->hash_next;
   34.60 -    return blkif;
   34.61 -}
   34.62 -
   34.63 -vdi_t *blkif_get_vdi(blkif_t *blkif, blkif_vdev_t device)
   34.64 -{
   34.65 -    vdi_t *vdi = blkif->vdi_hash[VDI_HASH(device)];
   34.66 -    
   34.67 -    while ((vdi != NULL) && (vdi->vdevice != device))
   34.68 -        vdi = vdi->next;
   34.69 -    
   34.70 -    return vdi;
   34.71 -}
   34.72 -
   34.73 -/* ------[ control message handling ]-------------------------------------- */
   34.74 -
   34.75 -void blkif_create(blkif_be_create_t *create)
   34.76 -{
   34.77 -    domid_t       domid  = create->domid;
   34.78 -    unsigned int  handle = create->blkif_handle;
   34.79 -    blkif_t     **pblkif, *blkif;
   34.80 -
   34.81 -    DPRINTF("parallax (blkif_create): create is %p\n", create); 
   34.82 -    
   34.83 -    if ( (blkif = (blkif_t *)malloc(sizeof(blkif_t))) == NULL )
   34.84 -    {
   34.85 -        DPRINTF("Could not create blkif: out of memory\n");
   34.86 -        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   34.87 -        return;
   34.88 -    }
   34.89 -
   34.90 -    memset(blkif, 0, sizeof(*blkif));
   34.91 -    blkif->domid  = domid;
   34.92 -    blkif->handle = handle;
   34.93 -    blkif->status = DISCONNECTED;
   34.94 -
   34.95 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   34.96 -    while ( *pblkif != NULL )
   34.97 -    {
   34.98 -        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   34.99 -        {
  34.100 -            DPRINTF("Could not create blkif: already exists (%d,%d)\n",
  34.101 -                domid, handle);
  34.102 -            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
  34.103 -            free(blkif);
  34.104 -            return;
  34.105 -        }
  34.106 -        pblkif = &(*pblkif)->hash_next;
  34.107 -    }
  34.108 -
  34.109 -    blkif->hash_next = *pblkif;
  34.110 -    *pblkif = blkif;
  34.111 -
  34.112 -    DPRINTF("Successfully created blkif\n");
  34.113 -    create->status = BLKIF_BE_STATUS_OKAY;
  34.114 -}
  34.115 -
  34.116 -void blkif_destroy(blkif_be_destroy_t *destroy)
  34.117 -{
  34.118 -    domid_t       domid  = destroy->domid;
  34.119 -    unsigned int  handle = destroy->blkif_handle;
  34.120 -    blkif_t     **pblkif, *blkif;
  34.121 -
  34.122 -    DPRINTF("parallax (blkif_destroy): destroy is %p\n", destroy); 
  34.123 -    
  34.124 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
  34.125 -    while ( (blkif = *pblkif) != NULL )
  34.126 -    {
  34.127 -        if ( (blkif->domid == domid) && (blkif->handle == handle) )
  34.128 -        {
  34.129 -            if ( blkif->status != DISCONNECTED )
  34.130 -                goto still_connected;
  34.131 -            goto destroy;
  34.132 -        }
  34.133 -        pblkif = &blkif->hash_next;
  34.134 -    }
  34.135 -
  34.136 -    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  34.137 -    return;
  34.138 -
  34.139 - still_connected:
  34.140 -    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
  34.141 -    return;
  34.142 -
  34.143 - destroy:
  34.144 -    *pblkif = blkif->hash_next;
  34.145 -    free(blkif);
  34.146 -    destroy->status = BLKIF_BE_STATUS_OKAY;
  34.147 -}
  34.148 -
  34.149 -void vbd_create(blkif_be_vbd_create_t *create)
  34.150 -{
  34.151 -    blkif_t            *blkif;
  34.152 -    vdi_t              *vdi, **vdip;
  34.153 -    blkif_vdev_t        vdevice = create->vdevice;
  34.154 -
  34.155 -    DPRINTF("parallax (vbd_create): create=%p\n", create); 
  34.156 -    
  34.157 -    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
  34.158 -    if ( blkif == NULL )
  34.159 -    {
  34.160 -        DPRINTF("vbd_create attempted for non-existent blkif (%u,%u)\n", 
  34.161 -                create->domid, create->blkif_handle); 
  34.162 -        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  34.163 -        return;
  34.164 -    }
  34.165 -
  34.166 -    /* VDI identifier is in grow->extent.sector_start */
  34.167 -    DPRINTF("vbd_create: create->dev_handle (id) is %lx\n", 
  34.168 -            (unsigned long)create->dev_handle);
  34.169 -
  34.170 -    vdi = vdi_get(create->dev_handle);
  34.171 -    if (vdi == NULL)
  34.172 -    {
  34.173 -        printf("parallax (vbd_create): VDI %lx not found.\n",
  34.174 -               (unsigned long)create->dev_handle);
  34.175 -        create->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  34.176 -        return;
  34.177 -    }
  34.178 -    
  34.179 -    vdi->next = NULL;
  34.180 -    vdi->vdevice = vdevice;
  34.181 -    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
  34.182 -    while (*vdip != NULL)
  34.183 -        vdip = &(*vdip)->next;
  34.184 -    *vdip = vdi;
  34.185 -    
  34.186 -    DPRINTF("blkif_create succeeded\n"); 
  34.187 -    create->status = BLKIF_BE_STATUS_OKAY;
  34.188 -}
  34.189 -
  34.190 -void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
  34.191 -{
  34.192 -    blkif_t            *blkif;
  34.193 -    vdi_t              *vdi, **vdip;
  34.194 -    blkif_vdev_t        vdevice = destroy->vdevice;
  34.195 -    
  34.196 -    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
  34.197 -    if ( blkif == NULL )
  34.198 -    {
  34.199 -        DPRINTF("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
  34.200 -                destroy->domid, destroy->blkif_handle); 
  34.201 -        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  34.202 -        return;
  34.203 -    }
  34.204 -
  34.205 -    vdip = &blkif->vdi_hash[VDI_HASH(vdevice)];
  34.206 -    while ((*vdip != NULL) && ((*vdip)->vdevice != vdevice))
  34.207 -        vdip = &(*vdip)->next;
  34.208 -
  34.209 -    if (*vdip != NULL) 
  34.210 -    {
  34.211 -        vdi = *vdip;
  34.212 -        *vdip = vdi->next;
  34.213 -        vdi_put(vdi);
  34.214 -    }
  34.215 -        
  34.216 -}
  34.217 -
  34.218 -int parallax_control(control_msg_t *msg)
  34.219 -{
  34.220 -    domid_t  domid;
  34.221 -    int      ret;
  34.222 -
  34.223 -    DPRINTF("parallax_control: msg is %p\n", msg); 
  34.224 -    
  34.225 -    if (msg->type != CMSG_BLKIF_BE) 
  34.226 -    {
  34.227 -        printf("Unexpected control message (%d)\n", msg->type);
  34.228 -        return 0;
  34.229 -    }
  34.230 -    
  34.231 -    switch(msg->subtype)
  34.232 -    {
  34.233 -    case CMSG_BLKIF_BE_CREATE:
  34.234 -        if ( msg->length != sizeof(blkif_be_create_t) )
  34.235 -            goto parse_error;
  34.236 -        blkif_create((blkif_be_create_t *)msg->msg);
  34.237 -        break;   
  34.238 -        
  34.239 -    case CMSG_BLKIF_BE_DESTROY:
  34.240 -        if ( msg->length != sizeof(blkif_be_destroy_t) )
  34.241 -            goto parse_error;
  34.242 -        blkif_destroy((blkif_be_destroy_t *)msg->msg);
  34.243 -        break;  
  34.244 -        
  34.245 -    case CMSG_BLKIF_BE_VBD_CREATE:
  34.246 -        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
  34.247 -            goto parse_error;
  34.248 -        vbd_create((blkif_be_vbd_create_t *)msg->msg);
  34.249 -        break;
  34.250 -        
  34.251 -    case CMSG_BLKIF_BE_VBD_DESTROY:
  34.252 -        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
  34.253 -            goto parse_error;
  34.254 -        vbd_destroy((blkif_be_vbd_destroy_t *)msg->msg);
  34.255 -        break;
  34.256 -
  34.257 -    case CMSG_BLKIF_BE_CONNECT:
  34.258 -    case CMSG_BLKIF_BE_DISCONNECT:
  34.259 -        /* we don't manage the device channel, the tap does. */
  34.260 -        break;
  34.261 -
  34.262 -    default:
  34.263 -        goto parse_error;
  34.264 -    }
  34.265 -    return 0;
  34.266 -parse_error:
  34.267 -    printf("Bad control message!\n");
  34.268 -    return 0;
  34.269 -    
  34.270 -}    
  34.271 -
  34.272 -int parallax_probe(blkif_request_t *req, blkif_t *blkif)
  34.273 -{
  34.274 -    blkif_response_t *rsp;
  34.275 -    vdisk_t *img_info;
  34.276 -    vdi_t *vdi;
  34.277 -    int i, nr_vdis = 0; 
  34.278 -
  34.279 -    DPRINTF("parallax_probe: req=%p, blkif=%p\n", req, blkif); 
  34.280 -
  34.281 -    /* We expect one buffer only. */
  34.282 -    if ( req->nr_segments != 1 )
  34.283 -      goto err;
  34.284 -
  34.285 -    /* Make sure the buffer is page-sized. */
  34.286 -    if ( (req->seg[0].first_sect != 0) || (req->seg[0].last_sect != 7) )
  34.287 -      goto err;
  34.288 -
  34.289 -    /* fill the list of devices */
  34.290 -    for (i=0; i<VDI_HASHSZ; i++) {
  34.291 -        vdi = blkif->vdi_hash[i];
  34.292 -        while (vdi) {
  34.293 -            img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0);
  34.294 -            img_info[nr_vdis].device   = vdi->vdevice;
  34.295 -            img_info[nr_vdis].info     = 0;
  34.296 -            /* The -1 here accounts for the LSB in the radix tree */
  34.297 -            img_info[nr_vdis].capacity = 
  34.298 -                    ((1LL << (VDI_HEIGHT-1)) * SECTS_PER_NODE);
  34.299 -            nr_vdis++;
  34.300 -            vdi = vdi->next;
  34.301 -        }
  34.302 -    }
  34.303 -
  34.304 -    
  34.305 -    rsp = (blkif_response_t *)req;
  34.306 -    rsp->id = req->id;
  34.307 -    rsp->operation = BLKIF_OP_PROBE;
  34.308 -    rsp->status = nr_vdis; /* number of disks */
  34.309 -
  34.310 -    DPRINTF("parallax_probe: send positive response (nr_vdis=%d)\n", nr_vdis);
  34.311 -    return  BLKTAP_RESPOND;
  34.312 -err:
  34.313 -    rsp = (blkif_response_t *)req;
  34.314 -    rsp->id = req->id;
  34.315 -    rsp->operation = BLKIF_OP_PROBE;
  34.316 -    rsp->status = BLKIF_RSP_ERROR;
  34.317 -    
  34.318 -    DPRINTF("parallax_probe: send error response\n"); 
  34.319 -    return BLKTAP_RESPOND;  
  34.320 -}
  34.321 -
  34.322 -typedef struct {
  34.323 -    blkif_request_t *req;
  34.324 -    int              count;
  34.325 -    int              error;
  34.326 -    pthread_mutex_t  mutex;
  34.327 -} pending_t;
  34.328 -
  34.329 -#define MAX_REQUESTS 64
  34.330 -pending_t pending_list[MAX_REQUESTS];
  34.331 -
  34.332 -struct cb_param {
  34.333 -    pending_t *pent;
  34.334 -    int       segment;
  34.335 -    uint64_t       sector; 
  34.336 -    uint64_t       vblock; /* for debug printing -- can be removed. */
  34.337 -};
  34.338 -
  34.339 -static void read_cb(struct io_ret r, void *in_param)
  34.340 -{
  34.341 -    struct cb_param *param = (struct cb_param *)in_param;
  34.342 -    pending_t *p = param->pent;
  34.343 -    int segment = param->segment;
  34.344 -    blkif_request_t *req = p->req;
  34.345 -    unsigned long size, offset, start;
  34.346 -    char *dpage, *spage;
  34.347 -	
  34.348 -    spage  = IO_BLOCK(r);
  34.349 -    if (spage == NULL) { p->error++; goto finish; }
  34.350 -    dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), segment);
  34.351 -    
  34.352 -    /* Calculate read size and offset within the read block. */
  34.353 -
  34.354 -    offset = (param->sector << SECTOR_SHIFT) % BLOCK_SIZE;
  34.355 -    size = (req->seg[segment].last_sect - req->seg[segment].first_sect + 1) <<
  34.356 -        SECTOR_SHIFT;
  34.357 -    start = req->seg[segment].first_sect << SECTOR_SHIFT;
  34.358 -
  34.359 -    DPRINTF("ParallaxRead: sect: %lld (%ld,%ld),  "
  34.360 -            "vblock %llx, "
  34.361 -            "size %lx\n", 
  34.362 -            param->sector,
  34.363 -            p->req->seg[segment].first_sect,
  34.364 -            p->req->seg[segment].last_sect,
  34.365 -            param->vblock, size); 
  34.366 -
  34.367 -    memcpy(dpage + start, spage + offset, size);
  34.368 -    freeblock(spage);
  34.369 -    
  34.370 -    /* Done the read.  Now update the pending record. */
  34.371 - finish:
  34.372 -    pthread_mutex_lock(&p->mutex);
  34.373 -    p->count--;
  34.374 -    
  34.375 -    if (p->count == 0) {
  34.376 -    	blkif_response_t *rsp;
  34.377 -    	
  34.378 -        rsp = (blkif_response_t *)req;
  34.379 -        rsp->id = req->id;
  34.380 -        rsp->operation = BLKIF_OP_READ;
  34.381 -    	if (p->error == 0) {
  34.382 -            rsp->status = BLKIF_RSP_OKAY;
  34.383 -    	} else {
  34.384 -            rsp->status = BLKIF_RSP_ERROR;
  34.385 -    	}
  34.386 -        blktap_inject_response(rsp);       
  34.387 -    }
  34.388 -    
  34.389 -    pthread_mutex_unlock(&p->mutex);
  34.390 -	
  34.391 -    free(param); /* TODO: replace with cached alloc/dealloc */
  34.392 -}	
  34.393 -
  34.394 -int parallax_read(blkif_request_t *req, blkif_t *blkif)
  34.395 -{
  34.396 -    blkif_response_t *rsp;
  34.397 -    uint64_t vblock, gblock;
  34.398 -    vdi_t *vdi;
  34.399 -    uint64_t sector;
  34.400 -    int i;
  34.401 -    char *dpage, *spage;
  34.402 -    pending_t *pent;
  34.403 -
  34.404 -    vdi = blkif_get_vdi(blkif, req->device);
  34.405 -    
  34.406 -    if ( vdi == NULL )
  34.407 -        goto err;
  34.408 -        
  34.409 -    pent = &pending_list[ID_TO_IDX(req->id)];
  34.410 -    pent->count = req->nr_segments;
  34.411 -    pent->req = req;
  34.412 -    pthread_mutex_init(&pent->mutex, NULL);
  34.413 -    
  34.414 -    for (i = 0; i < req->nr_segments; i++) {
  34.415 -        pthread_t tid;
  34.416 -        int ret;
  34.417 -        struct cb_param *p;
  34.418 -        
  34.419 -        /* Round the requested segment to a block address. */
  34.420 -        sector  = req->sector_number + (8*i);
  34.421 -        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
  34.422 -        
  34.423 -        /* TODO: Replace this call to malloc with a cached allocation */
  34.424 -        p = (struct cb_param *)malloc(sizeof(struct cb_param));
  34.425 -        p->pent = pent;
  34.426 -        p->sector = sector; 
  34.427 -        p->segment = i;     
  34.428 -        p->vblock = vblock; /* dbg */
  34.429 -        
  34.430 -        /* Get that block from the store. */
  34.431 -        vdi_read(vdi, vblock, read_cb, (void *)p);    
  34.432 -    }
  34.433 -    
  34.434 -    return BLKTAP_STOLEN;
  34.435 -
  34.436 -err:
  34.437 -    rsp = (blkif_response_t *)req;
  34.438 -    rsp->id = req->id;
  34.439 -    rsp->operation = BLKIF_OP_READ;
  34.440 -    rsp->status = BLKIF_RSP_ERROR;
  34.441 -    
  34.442 -    return BLKTAP_RESPOND;  
  34.443 -}
  34.444 -
  34.445 -static void write_cb(struct io_ret r, void *in_param)
  34.446 -{
  34.447 -    struct cb_param *param = (struct cb_param *)in_param;
  34.448 -    pending_t *p = param->pent;
  34.449 -    blkif_request_t *req = p->req;
  34.450 -    
  34.451 -    /* catch errors from the block code. */
  34.452 -    if (IO_INT(r) < 0) p->error++;
  34.453 -    
  34.454 -    pthread_mutex_lock(&p->mutex);
  34.455 -    p->count--;
  34.456 -    
  34.457 -    if (p->count == 0) {
  34.458 -    	blkif_response_t *rsp;
  34.459 -    	
  34.460 -        rsp = (blkif_response_t *)req;
  34.461 -        rsp->id = req->id;
  34.462 -        rsp->operation = BLKIF_OP_WRITE;
  34.463 -    	if (p->error == 0) {
  34.464 -            rsp->status = BLKIF_RSP_OKAY;
  34.465 -    	} else {
  34.466 -            rsp->status = BLKIF_RSP_ERROR;
  34.467 -    	}
  34.468 -        blktap_inject_response(rsp);       
  34.469 -    }
  34.470 -    
  34.471 -    pthread_mutex_unlock(&p->mutex);
  34.472 -	
  34.473 -    free(param); /* TODO: replace with cached alloc/dealloc */
  34.474 -}
  34.475 -
  34.476 -int parallax_write(blkif_request_t *req, blkif_t *blkif)
  34.477 -{
  34.478 -    blkif_response_t *rsp;
  34.479 -    uint64_t sector;
  34.480 -    int i, writable = 0;
  34.481 -    uint64_t vblock, gblock;
  34.482 -    char *spage;
  34.483 -    unsigned long size, offset, start;
  34.484 -    vdi_t *vdi;
  34.485 -    pending_t *pent;
  34.486 -
  34.487 -    vdi = blkif_get_vdi(blkif, req->device);
  34.488 -    
  34.489 -    if ( vdi == NULL )
  34.490 -        goto err;
  34.491 -        
  34.492 -    pent = &pending_list[ID_TO_IDX(req->id)];
  34.493 -    pent->count = req->nr_segments;
  34.494 -    pent->req = req;
  34.495 -    pthread_mutex_init(&pent->mutex, NULL);
  34.496 -    
  34.497 -    for (i = 0; i < req->nr_segments; i++) {
  34.498 -        struct cb_param *p;
  34.499 -        
  34.500 -        spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  34.501 -        
  34.502 -        /* Round the requested segment to a block address. */
  34.503 -        
  34.504 -        sector  = req->sector_number + (8*i);
  34.505 -        vblock = (sector << SECTOR_SHIFT) >> BLOCK_SHIFT;
  34.506 -        
  34.507 -        /* Calculate read size and offset within the read block. */
  34.508 -        
  34.509 -        offset = (sector << SECTOR_SHIFT) % BLOCK_SIZE;
  34.510 -        size = (req->seg[i].last_sect - req->seg[i].first_sect + 1) <<
  34.511 -            SECTOR_SHIFT;
  34.512 -        start = req->seg[i].first_sect << SECTOR_SHIFT;
  34.513 -
  34.514 -        DPRINTF("ParallaxWrite: sect: %lld (%ld,%ld),  "
  34.515 -                "vblock %llx, gblock %llx, "
  34.516 -                "size %lx\n", 
  34.517 -                sector, 
  34.518 -                req->seg[i].first_sect, req->seg[i].last_sect,
  34.519 -                vblock, gblock, size); 
  34.520 -      
  34.521 -        /* XXX: For now we just freak out if they try to write a   */
  34.522 -        /* non block-sized, block-aligned page.                    */
  34.523 -        
  34.524 -        if ((offset != 0) || (size != BLOCK_SIZE) || (start != 0)) {
  34.525 -            printf("]\n] STRANGE WRITE!\n]\n");
  34.526 -            goto err;
  34.527 -        }
  34.528 -        
  34.529 -        /* TODO: Replace this call to malloc with a cached allocation */
  34.530 -        p = (struct cb_param *)malloc(sizeof(struct cb_param));
  34.531 -        p->pent = pent;
  34.532 -        p->sector = sector; 
  34.533 -        p->segment = i;     
  34.534 -        p->vblock = vblock; /* dbg */
  34.535 -        
  34.536 -        /* Issue the write to the store. */
  34.537 -        vdi_write(vdi, vblock, spage, write_cb, (void *)p);
  34.538 -    }
  34.539 -
  34.540 -    return BLKTAP_STOLEN;
  34.541 -
  34.542 -err:
  34.543 -    rsp = (blkif_response_t *)req;
  34.544 -    rsp->id = req->id;
  34.545 -    rsp->operation = BLKIF_OP_WRITE;
  34.546 -    rsp->status = BLKIF_RSP_ERROR;
  34.547 -    
  34.548 -    return BLKTAP_RESPOND;  
  34.549 -}
  34.550 -
  34.551 -int parallax_request(blkif_request_t *req)
  34.552 -{
  34.553 -    blkif_response_t *rsp;
  34.554 -    domid_t  dom   = ID_TO_DOM(req->id);
  34.555 -    blkif_t *blkif = blkif_find_by_handle(dom, 0);
  34.556 -    
  34.557 -    if (blkif == NULL)
  34.558 -        goto err;
  34.559 -    
  34.560 -    if ( req->operation == BLKIF_OP_PROBE ) {
  34.561 -        
  34.562 -        return parallax_probe(req, blkif);
  34.563 -        
  34.564 -    } else if ( req->operation == BLKIF_OP_READ ) {
  34.565 -        
  34.566 -        return parallax_read(req, blkif);
  34.567 -        
  34.568 -    } else if ( req->operation == BLKIF_OP_WRITE ) {
  34.569 -        
  34.570 -        return parallax_write(req, blkif);
  34.571 -        
  34.572 -    } else {
  34.573 -        printf("Unknown request message type!\n");
  34.574 -        /* Unknown operation */
  34.575 -        goto err;
  34.576 -    }
  34.577 -    
  34.578 -err:
  34.579 -    rsp = (blkif_response_t *)req;
  34.580 -    rsp->operation = req->operation;
  34.581 -    rsp->id = req->id;
  34.582 -    rsp->status = BLKIF_RSP_ERROR;
  34.583 -    return BLKTAP_RESPOND;  
  34.584 -}
  34.585 -
  34.586 -void __init_parallax(void) 
  34.587 -{
  34.588 -    memset(blkif_hash, 0, sizeof(blkif_hash));
  34.589 -}
  34.590 -
  34.591 -
  34.592 -
  34.593 -int main(int argc, char *argv[])
  34.594 -{
  34.595 -    DPRINTF("parallax: starting.\n"); 
  34.596 -    __init_blockstore();
  34.597 -    DPRINTF("parallax: initialized blockstore...\n"); 
  34.598 -    init_block_async();
  34.599 -    DPRINTF("parallax: initialized async blocks...\n"); 
  34.600 -    __init_vdi();
  34.601 -    DPRINTF("parallax: initialized vdi registry etc...\n"); 
  34.602 -    __init_parallax();
  34.603 -    DPRINTF("parallax: initialized local stuff..\n"); 
  34.604 -
  34.605 -    blktap_register_ctrl_hook("parallax_control", parallax_control);
  34.606 -    blktap_register_request_hook("parallax_request", parallax_request);
  34.607 -    DPRINTF("parallax: added ctrl + request hooks, starting listen...\n"); 
  34.608 -    blktap_listen();
  34.609 -    
  34.610 -    return 0;
  34.611 -}
    35.1 --- a/tools/blktap/parallax/radix.c	Fri Jun 23 15:26:01 2006 -0600
    35.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.3 @@ -1,631 +0,0 @@
    35.4 -/*
    35.5 - * Radix tree for mapping (up to) 63-bit virtual block IDs to
    35.6 - * 63-bit global block IDs
    35.7 - *
    35.8 - * Pointers within the tree set aside the least significant bit to indicate
    35.9 - * whther or not the target block is writable from this node.
   35.10 - *
   35.11 - * The block with ID 0 is assumed to be an empty block of all zeros
   35.12 - */
   35.13 -
   35.14 -#include <unistd.h>
   35.15 -#include <stdio.h>
   35.16 -#include <stdlib.h>
   35.17 -#include <assert.h>
   35.18 -#include <string.h>
   35.19 -#include <pthread.h>
   35.20 -#include "blockstore.h"
   35.21 -#include "radix.h"
   35.22 -
   35.23 -#define RADIX_TREE_MAP_SHIFT 9
   35.24 -#define RADIX_TREE_MAP_MASK 0x1ff
   35.25 -#define RADIX_TREE_MAP_ENTRIES 512
   35.26 -
   35.27 -/*
   35.28 -#define DEBUG
   35.29 -*/
   35.30 -
   35.31 -/* Experimental radix cache. */
   35.32 -
   35.33 -static  pthread_mutex_t rcache_mutex = PTHREAD_MUTEX_INITIALIZER;
   35.34 -static  int rcache_count = 0;
   35.35 -#define RCACHE_MAX 1024
   35.36 -
   35.37 -typedef struct rcache_st {
   35.38 -    radix_tree_node  *node;
   35.39 -    uint64_t               id;
   35.40 -    struct rcache_st *hash_next;
   35.41 -    struct rcache_st *cache_next;
   35.42 -    struct rcache_st *cache_prev;
   35.43 -} rcache_t;
   35.44 -
   35.45 -static rcache_t *rcache_head = NULL;
   35.46 -static rcache_t *rcache_tail = NULL;
   35.47 -
   35.48 -#define RCHASH_SIZE 512ULL
   35.49 -rcache_t *rcache[RCHASH_SIZE];
   35.50 -#define RCACHE_HASH(_id) ((_id) & (RCHASH_SIZE - 1))
   35.51 -
   35.52 -void __rcache_init(void)
   35.53 -{
   35.54 -    int i;
   35.55 -
   35.56 -    for (i=0; i<RCHASH_SIZE; i++)
   35.57 -        rcache[i] = NULL;
   35.58 -}
   35.59 -    
   35.60 -
   35.61 -void rcache_write(uint64_t id, radix_tree_node *node)
   35.62 -{
   35.63 -    rcache_t *r, *tmp, **curs;
   35.64 -    
   35.65 -    pthread_mutex_lock(&rcache_mutex);
   35.66 -    
   35.67 -    /* Is it already in the cache? */
   35.68 -    r = rcache[RCACHE_HASH(id)];
   35.69 -    
   35.70 -    for (;;) {
   35.71 -        if (r == NULL) 
   35.72 -            break;
   35.73 -        if (r->id == id) 
   35.74 -        {
   35.75 -            memcpy(r->node, node, BLOCK_SIZE);
   35.76 -            
   35.77 -            /* bring to front. */
   35.78 -            if (r != rcache_head) {
   35.79 -                
   35.80 -                if (r == rcache_tail) {
   35.81 -                    if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
   35.82 -                    rcache_tail->cache_next = NULL;
   35.83 -                }
   35.84 -
   35.85 -                tmp = r->cache_next;
   35.86 -                if (r->cache_next != NULL) r->cache_next->cache_prev 
   35.87 -                                                     = r->cache_prev;
   35.88 -                if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
   35.89 -
   35.90 -                r->cache_prev = NULL;
   35.91 -                r->cache_next = rcache_head;
   35.92 -                if (rcache_head != NULL) rcache_head->cache_prev = r;
   35.93 -                rcache_head = r;
   35.94 -            }
   35.95 -
   35.96 -//printf("Update (%Ld)\n", r->id);
   35.97 -            goto done;
   35.98 -        }
   35.99 -        r = r->hash_next;
  35.100 -    }
  35.101 -    
  35.102 -    if ( rcache_count == RCACHE_MAX ) 
  35.103 -    {
  35.104 -        /* Remove an entry */
  35.105 -        
  35.106 -        r = rcache_tail;
  35.107 -        if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
  35.108 -        rcache_tail->cache_next = NULL;
  35.109 -        freeblock(r->node);
  35.110 -        
  35.111 -        curs = &rcache[RCACHE_HASH(r->id)];
  35.112 -        while ((*curs) != r)
  35.113 -            curs = &(*curs)->hash_next;
  35.114 -        *curs = r->hash_next;
  35.115 -//printf("Evict (%Ld)\n", r->id);
  35.116 -        
  35.117 -    } else {
  35.118 -        
  35.119 -        r = (rcache_t *)malloc(sizeof(rcache_t));
  35.120 -        rcache_count++;
  35.121 -    }
  35.122 -    
  35.123 -    r->node = newblock();
  35.124 -    memcpy(r->node, node, BLOCK_SIZE);
  35.125 -    r->id = id;
  35.126 -    
  35.127 -    r->hash_next = rcache[RCACHE_HASH(id)];
  35.128 -    rcache[RCACHE_HASH(id)] = r;
  35.129 -    
  35.130 -    r->cache_prev = NULL;
  35.131 -    r->cache_next = rcache_head;
  35.132 -    if (rcache_head != NULL) rcache_head->cache_prev = r;
  35.133 -    rcache_head = r;
  35.134 -    if (rcache_tail == NULL) rcache_tail = r;
  35.135 -    
  35.136 -//printf("Added (%Ld, %p)\n", id, r->node);
  35.137 -done:
  35.138 -    pthread_mutex_unlock(&rcache_mutex);
  35.139 -}
  35.140 -
  35.141 -radix_tree_node *rcache_read(uint64_t id)
  35.142 -{
  35.143 -    rcache_t *r, *tmp;
  35.144 -    radix_tree_node *node = NULL;
  35.145 -    
  35.146 -    pthread_mutex_lock(&rcache_mutex);
  35.147 -
  35.148 -    r = rcache[RCACHE_HASH(id)];
  35.149 -    
  35.150 -    for (;;) {
  35.151 -        if (r == NULL) {
  35.152 -//printf("Miss (%Ld)\n", id);
  35.153 -            goto done;
  35.154 -        }
  35.155 -        if (r->id == id) break;
  35.156 -        r = r->hash_next;
  35.157 -    }
  35.158 -   
  35.159 -    /* bring to front. */
  35.160 -    if (r != rcache_head) 
  35.161 -    {
  35.162 -        if (r == rcache_tail) {
  35.163 -            if (r->cache_prev != NULL) rcache_tail = r->cache_prev;
  35.164 -            rcache_tail->cache_next = NULL;
  35.165 -        }
  35.166 -        tmp = r->cache_next;
  35.167 -        if (r->cache_next != NULL) r->cache_next->cache_prev = r->cache_prev;
  35.168 -        if (r->cache_prev != NULL) r->cache_prev->cache_next = tmp;
  35.169 -
  35.170 -        r->cache_prev = NULL;
  35.171 -        r->cache_next = rcache_head;
  35.172 -        if (rcache_head != NULL) rcache_head->cache_prev = r;
  35.173 -        rcache_head = r;
  35.174 -    }
  35.175 -    
  35.176 -    node = newblock();
  35.177 -    memcpy(node, r->node, BLOCK_SIZE);
  35.178 -    
  35.179 -//printf("Hit (%Ld, %p)\n", id, r->node);
  35.180 -done:
  35.181 -    pthread_mutex_unlock(&rcache_mutex);
  35.182 -    
  35.183 -    return(node);
  35.184 -}
  35.185 -
  35.186 -
  35.187 -void *rc_readblock(uint64_t id)
  35.188 -{
  35.189 -    void *ret;
  35.190 -    
  35.191 -    ret = (void *)rcache_read(id);
  35.192 -    
  35.193 -    if (ret != NULL) return ret;
  35.194 -    
  35.195 -    ret = readblock(id);
  35.196 -    
  35.197 -    if (ret != NULL)
  35.198 -        rcache_write(id, ret);
  35.199 -    
  35.200 -    return(ret);
  35.201 -}
  35.202 -
  35.203 -uint64_t rc_allocblock(void *block)
  35.204 -{
  35.205 -    uint64_t ret;
  35.206 -    
  35.207 -    ret = allocblock(block);
  35.208 -    
  35.209 -    if (ret != ZERO)
  35.210 -        rcache_write(ret, block);
  35.211 -    
  35.212 -    return(ret);
  35.213 -}
  35.214 -
  35.215 -int rc_writeblock(uint64_t id, void *block)
  35.216 -{
  35.217 -    int ret;
  35.218 -    
  35.219 -    ret = writeblock(id, block);
  35.220 -    rcache_write(id, block);
  35.221 -    
  35.222 -    return(ret);
  35.223 -}
  35.224 -
  35.225 -
  35.226 -/*
  35.227 - * block device interface and other helper functions
  35.228 - * with these functions, block id is just a 63-bit number, with
  35.229 - * no special consideration for the LSB
  35.230 - */
  35.231 -radix_tree_node cloneblock(radix_tree_node block);
  35.232 -
  35.233 -/*
  35.234 - * main api
  35.235 - * with these functions, the LSB of root always indicates
  35.236 - * whether or not the block is writable, including the return
  35.237 - * values of update and snapshot
  35.238 - */
  35.239 -uint64_t lookup(int height, uint64_t root, uint64_t key);
  35.240 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
  35.241 -uint64_t snapshot(uint64_t root);
  35.242 -
  35.243 -/**
  35.244 - * cloneblock: clone an existing block in memory
  35.245 - *   @block: the old block
  35.246 - *
  35.247 - *   @return: new block, with LSB cleared for every entry
  35.248 - */
  35.249 -radix_tree_node cloneblock(radix_tree_node block) {
  35.250 -    radix_tree_node node = (radix_tree_node) malloc(BLOCK_SIZE);
  35.251 -    int i;
  35.252 -    if (node == NULL) {
  35.253 -        perror("cloneblock malloc");
  35.254 -        return NULL;
  35.255 -    }
  35.256 -    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
  35.257 -        node[i] = block[i] & ONEMASK;
  35.258 -    return node;
  35.259 -}
  35.260 -
  35.261 -/**
  35.262 - * lookup: find a value given a key
  35.263 - *   @height: height in bits of the radix tree
  35.264 - *   @root: root node id, with set LSB indicating writable node
  35.265 - *   @key: key to lookup
  35.266 - *
  35.267 - *   @return: value on success, zero on error
  35.268 - */
  35.269 -
  35.270 -uint64_t lookup(int height, uint64_t root, uint64_t key) {
  35.271 -    radix_tree_node node;
  35.272 -    uint64_t mask = ONE;
  35.273 -    
  35.274 -    assert(key >> height == 0);
  35.275 -
  35.276 -    /* the root block may be smaller to ensure all leaves are full */
  35.277 -    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
  35.278 -
  35.279 -    /* now carve off equal sized chunks at each step */
  35.280 -    for (;;) {
  35.281 -        uint64_t oldroot;
  35.282 -
  35.283 -#ifdef DEBUG
  35.284 -        printf("lookup: height=%3d root=%3Ld offset=%3d%s\n", height, root,
  35.285 -                (int) ((key >> height) & RADIX_TREE_MAP_MASK),
  35.286 -                (iswritable(root) ? "" : " (readonly)"));
  35.287 -#endif
  35.288 -        
  35.289 -        if (getid(root) == ZERO)
  35.290 -            return ZERO;
  35.291 -
  35.292 -        oldroot = root;
  35.293 -        node = (radix_tree_node) rc_readblock(getid(root));
  35.294 -        if (node == NULL)
  35.295 -            return ZERO;
  35.296 -
  35.297 -        root = node[(key >> height) & RADIX_TREE_MAP_MASK];
  35.298 -        mask &= root;
  35.299 -        freeblock(node);
  35.300 -
  35.301 -        if (height == 0)
  35.302 -            return ( root & ONEMASK ) | mask;
  35.303 -
  35.304 -        height -= RADIX_TREE_MAP_SHIFT;
  35.305 -    }
  35.306 -
  35.307 -    return ZERO;
  35.308 -}
  35.309 -
  35.310 -/*
  35.311 - * update: set a radix tree entry, doing copy-on-write as necessary
  35.312 - *   @height: height in bits of the radix tree
  35.313 - *   @root: root node id, with set LSB indicating writable node
  35.314 - *   @key: key to set
  35.315 - *   @val: value to set, s.t. radix(key)=val
  35.316 - *
  35.317 - *   @returns: (possibly new) root id on success (with LSB=1), 0 on failure
  35.318 - */
  35.319 -
  35.320 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val) {
  35.321 -    int offset;
  35.322 -    uint64_t child;
  35.323 -    radix_tree_node node;
  35.324 -    
  35.325 -    /* base case--return val */
  35.326 -    if (height == 0)
  35.327 -        return val;
  35.328 -
  35.329 -    /* the root block may be smaller to ensure all leaves are full */
  35.330 -    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
  35.331 -    offset = (key >> height) & RADIX_TREE_MAP_MASK;
  35.332 -
  35.333 -#ifdef DEBUG
  35.334 -    printf("update: height=%3d root=%3Ld offset=%3d%s\n", height, root,
  35.335 -            offset, (iswritable(root)?"":" (clone)"));
  35.336 -#endif
  35.337 -
  35.338 -    /* load a block, or create a new one */
  35.339 -    if (root == ZERO) {
  35.340 -        node = (radix_tree_node) newblock();
  35.341 -    } else {
  35.342 -        node = (radix_tree_node) rc_readblock(getid(root));
  35.343 -
  35.344 -        if (!iswritable(root)) {
  35.345 -            /* need to clone this node */
  35.346 -            radix_tree_node oldnode = node;
  35.347 -            node = cloneblock(node);
  35.348 -            freeblock(oldnode);
  35.349 -            root = ZERO;
  35.350 -        }
  35.351 -    }
  35.352 -
  35.353 -    if (node == NULL) {
  35.354 -#ifdef DEBUG
  35.355 -        printf("update: node is null!\n");
  35.356 -#endif
  35.357 -        return ZERO;
  35.358 -    }
  35.359 -
  35.360 -    child = update(height, node[offset], key, val);
  35.361 -
  35.362 -    if (child == ZERO) {
  35.363 -        freeblock(node);
  35.364 -        return ZERO;
  35.365 -    } else if (child == node[offset]) {
  35.366 -        /* no change, so we already owned the child */
  35.367 -        assert(iswritable(root));
  35.368 -
  35.369 -        freeblock(node);
  35.370 -        return root;
  35.371 -    }
  35.372 -
  35.373 -    node[offset] = child;
  35.374 -
  35.375 -    /* new/cloned blocks need to be saved */
  35.376 -    if (root == ZERO) {
  35.377 -        /* mark this as an owned block */
  35.378 -        root = rc_allocblock(node);
  35.379 -        if (root)
  35.380 -            root = writable(root);
  35.381 -    } else if (rc_writeblock(getid(root), node) < 0) {
  35.382 -        freeblock(node);
  35.383 -        return ZERO;
  35.384 -    }
  35.385 -
  35.386 -    freeblock(node);
  35.387 -    return root;
  35.388 -}
  35.389 -
  35.390 -/**
  35.391 - * snapshot: create a snapshot
  35.392 - *   @root: old root node
  35.393 - *
  35.394 - *   @return: new root node, 0 on error
  35.395 - */
  35.396 -uint64_t snapshot(uint64_t root) {
  35.397 -    radix_tree_node node, newnode;
  35.398 -
  35.399 -    if ((node = rc_readblock(getid(root))) == NULL)
  35.400 -        return ZERO;
  35.401 -
  35.402 -    newnode = cloneblock(node);
  35.403 -    freeblock(node);
  35.404 -    if (newnode == NULL)
  35.405 -        return ZERO;
  35.406 -    
  35.407 -    root = rc_allocblock(newnode);
  35.408 -    freeblock(newnode);
  35.409 -
  35.410 -    if (root == ZERO)
  35.411 -        return ZERO;
  35.412 -    else
  35.413 -        return writable(root);
  35.414 -}
  35.415 -
  35.416 -/**
  35.417 - * collapse: collapse a parent onto a child.
  35.418 - * 
  35.419 - * NOTE: This assumes that parent and child really are, and further that
  35.420 - * there are no other children forked from this parent. (children of the
  35.421 - * child are okay...)
  35.422 - */
  35.423 -
  35.424 -int collapse(int height, uint64_t proot, uint64_t croot)
  35.425 -{
  35.426 -    int i, numlinks, ret, total = 0;
  35.427 -    radix_tree_node pnode, cnode;
  35.428 -    
  35.429 -    if (height == 0) {
  35.430 -        height = -1; /* terminate recursion */
  35.431 -    } else {        
  35.432 -        height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
  35.433 -    }
  35.434 -    numlinks = (1UL << RADIX_TREE_MAP_SHIFT);
  35.435 -
  35.436 -    /* Terminal cases: */
  35.437 -
  35.438 -    if ( (getid(proot) == ZERO) || (getid(croot) == ZERO) )
  35.439 -        return -1;
  35.440 -    
  35.441 -    /* get roots */
  35.442 -    if ((pnode = readblock(getid(proot))) == NULL)
  35.443 -        return -1;
  35.444 -    
  35.445 -    if ((cnode = readblock(getid(croot))) == NULL)
  35.446 -    {
  35.447 -        freeblock(pnode);
  35.448 -        return -1;
  35.449 -    }
  35.450 -    
  35.451 -    /* For each writable link in proot */
  35.452 -    for (i=0; i<numlinks; i++)
  35.453 -    {
  35.454 -        if ( pnode[i] == cnode[i] ) continue;
  35.455 -        
  35.456 -        /* collapse (next level) */
  35.457 -        /* if height != 0 and writable... */
  35.458 -        if (( height >= 0 ) && ( iswritable(pnode[i]) ) )
  35.459 -        {
  35.460 -            //printf("   %Ld is writable (i=%d).\n", getid(pnode[i]), i);
  35.461 -            ret = collapse(height, pnode[i], cnode[i]);
  35.462 -            if (ret == -1) 
  35.463 -            {
  35.464 -                total = -1;
  35.465 -            } else {
  35.466 -                total += ret;
  35.467 -            }
  35.468 -        }
  35.469 -    
  35.470 -        
  35.471 -    }
  35.472 -    
  35.473 -    /* if plink is writable, AND clink is writable -> free plink block */
  35.474 -    if ( ( iswritable(proot) ) && ( iswritable(croot) ) ) 
  35.475 -    {
  35.476 -        releaseblock(getid(proot));
  35.477 -        if (ret >=0) total++;
  35.478 -        //printf("   Delete %Ld\n", getid(proot));
  35.479 -    }
  35.480 -//printf("done : %Ld\n", getid(proot));
  35.481 -    return total;
  35.482 -
  35.483 -}
  35.484 -
  35.485 -
  35.486 -void print_root(uint64_t root, int height, FILE *dot_f)
  35.487 -{
  35.488 -    FILE *f;
  35.489 -    int i;
  35.490 -    radix_tree_node node;
  35.491 -    char *style[2] = { "", "style=bold,color=blue," };
  35.492 -    
  35.493 -    if (dot_f == NULL) {
  35.494 -        f = fopen("radix.dot", "w");
  35.495 -        if (f == NULL) {
  35.496 -            perror("print_root: open");
  35.497 -            return;
  35.498 -        }
  35.499 -
  35.500 -        /* write graph preamble */
  35.501 -        fprintf(f, "digraph G {\n");
  35.502 -
  35.503 -        /* add a node for this root. */
  35.504 -        fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
  35.505 -                getid(root), style[iswritable(root)], getid(root));
  35.506 -    }
  35.507 -    
  35.508 -    printf("print_root(%Ld)\n", getid(root));
  35.509 -    
  35.510 -    /* base case */
  35.511 -    if (height == 0) {
  35.512 -        /* add a node and edge for each child root */
  35.513 -        node = (radix_tree_node) readblock(getid(root));
  35.514 -        if (node == NULL)
  35.515 -            return;
  35.516 -        
  35.517 -        for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++) {
  35.518 -            if (node[i] != ZERO) {
  35.519 -                fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
  35.520 -                        getid(node[i]), style[iswritable(node[i])], 
  35.521 -                        getid(node[i]));
  35.522 -                fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
  35.523 -                        getid(node[i]), i);
  35.524 -            }
  35.525 -        }
  35.526 -        freeblock(node);
  35.527 -        return;
  35.528 -    }
  35.529 -
  35.530 -    /* the root block may be smaller to ensure all leaves are full */
  35.531 -    height = ((height - 1) / RADIX_TREE_MAP_SHIFT) * RADIX_TREE_MAP_SHIFT;
  35.532 -
  35.533 -    if (getid(root) == ZERO)
  35.534 -        return;
  35.535 -
  35.536 -    node = (radix_tree_node) readblock(getid(root));
  35.537 -    if (node == NULL)
  35.538 -        return;
  35.539 -
  35.540 -    /* add a node and edge for each child root */
  35.541 -    for (i = 0; i < RADIX_TREE_MAP_ENTRIES; i++)
  35.542 -        if (node[i] != ZERO) {
  35.543 -            fprintf(f, "   n%Ld [%sshape=box,label=\"%Ld\"];\n", 
  35.544 -                    getid(node[i]), style[iswritable(node[i])], 
  35.545 -                    getid(node[i]));
  35.546 -
  35.547 -            print_root(node[i], height-RADIX_TREE_MAP_SHIFT, f);
  35.548 -            fprintf(f, "   n%Ld -> n%Ld [label=\"%d\"]\n", getid(root), 
  35.549 -                    getid(node[i]), i);
  35.550 -        }
  35.551 -
  35.552 -    freeblock(node);
  35.553 -    
  35.554 -    /* write graph postamble */
  35.555 -    if (dot_f == NULL) {
  35.556 -        fprintf(f, "}\n");
  35.557 -        fclose(f);
  35.558 -    }
  35.559 -}
  35.560 -
  35.561 -#ifdef RADIX_STANDALONE
  35.562 -
  35.563 -int main(int argc, char **argv) {
  35.564 -    uint64_t key = ZERO, val = ZERO;
  35.565 -    uint64_t root = writable(2ULL);
  35.566 -    uint64_t p = ZERO, c = ZERO;
  35.567 -    int v;
  35.568 -    char buff[4096];
  35.569 -
  35.570 -    __init_blockstore();
  35.571 -    
  35.572 -    memset(buff, 0, 4096);
  35.573 -    /*fp = open("radix.dat", O_RDWR | O_CREAT, 0644);
  35.574 -
  35.575 -    if (fp < 3) {
  35.576 -        perror("open");
  35.577 -        return -1;
  35.578 -    }
  35.579 -    if (lseek(fp, 0, SEEK_END) == 0) {
  35.580 -        write(fp, buff, 4096);
  35.581 -    }*/
  35.582 -        
  35.583 -    allocblock(buff);
  35.584 -            
  35.585 -    printf("Recognized commands:\n"
  35.586 -           "Note: the LSB of a node number indicates if it is writable\n"
  35.587 -           "  root <node>               set root to <node>\n"
  35.588 -           "  snapshot                  take a snapshot of the root\n"
  35.589 -           "  set <key> <val>           set key=val\n"
  35.590 -           "  get <key>                 query key\n"
  35.591 -           "  c <proot> <croot>         collapse\n"
  35.592 -           "  pr                        print tree to dot\n"
  35.593 -           "  pf <1=verbose>            print freelist\n"
  35.594 -           "  quit\n"
  35.595 -           "\nroot = %Ld\n", root);
  35.596 -    for (;;) {
  35.597 -        //print_root(root, 34, NULL);
  35.598 -        //system("dot radix.dot -Tps -o radix.ps");
  35.599 -
  35.600 -        printf("> ");
  35.601 -        fflush(stdout);
  35.602 -        fgets(buff, 1024, stdin);
  35.603 -        if (feof(stdin))
  35.604 -            break;
  35.605 -        if (sscanf(buff, " root %Ld", &root) == 1) {
  35.606 -            printf("root set to %Ld\n", root);
  35.607 -        } else if (sscanf(buff, " set %Ld %Ld", &key, &val) == 2) {
  35.608 -            root = update(34, root, key, val);
  35.609 -            printf("root = %Ld\n", root);
  35.610 -        } else if (sscanf(buff, " c %Ld %Ld", &p, &c) == 2) {
  35.611 -            v = collapse(34, p, c);
  35.612 -            printf("reclaimed %d blocks.\n", v);
  35.613 -        } else if (sscanf(buff, " get %Ld", &key) == 1) {
  35.614 -            val = lookup(34, root, key);
  35.615 -            printf("value = %Ld\n", val);
  35.616 -        } else if (!strcmp(buff, "quit\n")) {
  35.617 -            break;
  35.618 -        } else if (!strcmp(buff, "snapshot\n")) {
  35.619 -            root = snapshot(root);
  35.620 -            printf("new root = %Ld\n", root);
  35.621 -        } else if (sscanf(buff, " pr %Ld", &root) == 1) {
  35.622 -            print_root(root, 34, NULL);
  35.623 -        } else if (sscanf(buff, " pf %d", &v) == 1) {
  35.624 -            freelist_count(v);
  35.625 -        } else if (!strcmp(buff, "pf\n")) {
  35.626 -            freelist_count(0);
  35.627 -        } else {
  35.628 -            printf("command not recognized\n");
  35.629 -        }
  35.630 -    }
  35.631 -    return 0;
  35.632 -}
  35.633 -
  35.634 -#endif
    36.1 --- a/tools/blktap/parallax/radix.h	Fri Jun 23 15:26:01 2006 -0600
    36.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.3 @@ -1,45 +0,0 @@
    36.4 -/*
    36.5 - * Radix tree for mapping (up to) 63-bit virtual block IDs to
    36.6 - * 63-bit global block IDs
    36.7 - *
    36.8 - * Pointers within the tree set aside the least significant bit to indicate
    36.9 - * whther or not the target block is writable from this node.
   36.10 - *
   36.11 - * The block with ID 0 is assumed to be an empty block of all zeros
   36.12 - */
   36.13 -
   36.14 -#ifndef __RADIX_H__
   36.15 -#define __RADIX_H__
   36.16 -
   36.17 -/* I don't really like exposing these, but... */
   36.18 -#define getid(x) (((x)>>1)&0x7fffffffffffffffLL)
   36.19 -#define putid(x) ((x)<<1)
   36.20 -#define writable(x) (((x)<<1)|1LL)
   36.21 -#define iswritable(x) ((x)&1LL)
   36.22 -#define ZERO 0LL
   36.23 -#define ONE 1LL
   36.24 -#define ONEMASK 0xffffffffffffffeLL
   36.25 -
   36.26 -#define RADIX_TREE_MAP_SHIFT 9
   36.27 -#define RADIX_TREE_MAP_MASK 0x1ff
   36.28 -#define RADIX_TREE_MAP_ENTRIES 512
   36.29 -
   36.30 -typedef uint64_t *radix_tree_node;
   36.31 -
   36.32 -
   36.33 -/*
   36.34 - * main api
   36.35 - * with these functions, the LSB of root always indicates
   36.36 - * whether or not the block is writable, including the return
   36.37 - * values of update and snapshot
   36.38 - */
   36.39 -uint64_t lookup(int height, uint64_t root, uint64_t key);
   36.40 -uint64_t update(int height, uint64_t root, uint64_t key, uint64_t val);
   36.41 -uint64_t snapshot(uint64_t root);
   36.42 -int collapse(int height, uint64_t proot, uint64_t croot);
   36.43 -int isprivate(int height, uint64_t root, uint64_t key);
   36.44 -
   36.45 -
   36.46 -void __rcache_init(void);
   36.47 -
   36.48 -#endif /* __RADIX_H__ */
    37.1 --- a/tools/blktap/parallax/requests-async.c	Fri Jun 23 15:26:01 2006 -0600
    37.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.3 @@ -1,762 +0,0 @@
    37.4 -/* requests-async.c
    37.5 - *
    37.6 - * asynchronous request dispatcher for radix access in parallax.
    37.7 - */
    37.8 -
    37.9 -#include <stdio.h>
   37.10 -#include <stdlib.h>
   37.11 -#include <string.h>
   37.12 -#include <ctype.h>
   37.13 -#include <assert.h>
   37.14 -#include <pthread.h>
   37.15 -#include <err.h>
   37.16 -#include <zlib.h> /* for crc32() */
   37.17 -#include "requests-async.h"
   37.18 -#include "vdi.h"
   37.19 -#include "radix.h"
   37.20 -
   37.21 -#define L1_IDX(_a) (((_a) & 0x0000000007fc0000ULL) >> 18)
   37.22 -#define L2_IDX(_a) (((_a) & 0x000000000003fe00ULL) >> 9)
   37.23 -#define L3_IDX(_a) (((_a) & 0x00000000000001ffULL))
   37.24 -
   37.25 -
   37.26 -#if 0
   37.27 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   37.28 -#else
   37.29 -#define DPRINTF(_f, _a...) ((void)0)
   37.30 -#endif
   37.31 -
   37.32 -struct block_info {
   37.33 -    uint32_t        crc;
   37.34 -    uint32_t        unused;
   37.35 -};
   37.36 -
   37.37 -struct io_req {
   37.38 -    enum { IO_OP_READ, IO_OP_WRITE } op;
   37.39 -    uint64_t        root;
   37.40 -    uint64_t        vaddr;
   37.41 -    int        state;
   37.42 -    io_cb_t    cb;
   37.43 -    void      *param;
   37.44 -    struct radix_lock *lock;
   37.45 -
   37.46 -    /* internal stuff: */
   37.47 -    struct io_ret     retval;/* holds the return while we unlock. */
   37.48 -    char             *block; /* the block to write */
   37.49 -    radix_tree_node   radix[3];
   37.50 -    uint64_t               radix_addr[3];
   37.51 -    struct block_info bi;
   37.52 -};
   37.53 -
   37.54 -void clear_w_bits(radix_tree_node node) 
   37.55 -{
   37.56 -    int i;
   37.57 -    for (i=0; i<RADIX_TREE_MAP_ENTRIES; i++)
   37.58 -        node[i] = node[i] & ONEMASK;
   37.59 -    return;
   37.60 -}
   37.61 -
   37.62 -void clear_L3_w_bits(radix_tree_node node) 
   37.63 -{
   37.64 -    int i;
   37.65 -    for (i=0; i<RADIX_TREE_MAP_ENTRIES; i+=2)
   37.66 -        node[i] = node[i] & ONEMASK;
   37.67 -    return;
   37.68 -}
   37.69 -
   37.70 -enum states {
   37.71 -    /* both */
   37.72 -    READ_L1,
   37.73 -    READ_L2,
   37.74 -    READ_L3,
   37.75 -
   37.76 -    /* read */
   37.77 -    READ_LOCKED,
   37.78 -    READ_DATA,
   37.79 -    READ_UNLOCKED,
   37.80 -    RETURN_ZERO,
   37.81 -
   37.82 -    /* write */
   37.83 -    WRITE_LOCKED,
   37.84 -    WRITE_DATA,
   37.85 -    WRITE_L3,
   37.86 -    WRITE_UNLOCKED,
   37.87 -    
   37.88 -    /* L3 Zero Path */
   37.89 -    ALLOC_DATA_L3z,
   37.90 -    WRITE_L3_L3z,
   37.91 -    
   37.92 -    /* L3 Fault Path */
   37.93 -    ALLOC_DATA_L3f,
   37.94 -    WRITE_L3_L3f,
   37.95 -    
   37.96 -    /* L2 Zero Path */
   37.97 -    ALLOC_DATA_L2z,
   37.98 -    WRITE_L2_L2z,
   37.99 -    ALLOC_L3_L2z,
  37.100 -    WRITE_L2_L3z,
  37.101 -    
  37.102 -    /* L2 Fault Path */
  37.103 -    READ_L3_L2f,
  37.104 -    ALLOC_DATA_L2f,
  37.105 -    WRITE_L2_L2f,
  37.106 -    ALLOC_L3_L2f,
  37.107 -    WRITE_L2_L3f,
  37.108 -
  37.109 -    /* L1 Zero Path */
  37.110 -    ALLOC_DATA_L1z,
  37.111 -    ALLOC_L3_L1z,
  37.112 -    ALLOC_L2_L1z,
  37.113 -    WRITE_L1_L1z,
  37.114 -
  37.115 -    /* L1 Fault Path */
  37.116 -    READ_L2_L1f,
  37.117 -    READ_L3_L1f,
  37.118 -    ALLOC_DATA_L1f,
  37.119 -    ALLOC_L3_L1f,
  37.120 -    ALLOC_L2_L1f,
  37.121 -    WRITE_L1_L1f,
  37.122 -    
  37.123 -};
  37.124 -
  37.125 -enum radix_offsets {
  37.126 -    L1 = 0, 
  37.127 -    L2 = 1,
  37.128 -    L3 = 2
  37.129 -};
  37.130 -
  37.131 -
  37.132 -static void read_cb(struct io_ret ret, void *param);
  37.133 -static void write_cb(struct io_ret ret, void *param);
  37.134 -
  37.135 -int vdi_read(vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param)
  37.136 -{
  37.137 -    struct io_req *req;
  37.138 -
  37.139 -    if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
  37.140 -    /* Every second line in the bottom-level radix tree is used to      */
  37.141 -    /* store crc32 values etc. We shift the vadder here to achied this. */
  37.142 -    vaddr <<= 1;
  37.143 -
  37.144 -    req = (struct io_req *)malloc(sizeof (struct io_req));
  37.145 -    if (req == NULL) return ERR_NOMEM;
  37.146 -
  37.147 -    req->radix[0] = req->radix[1] = req->radix[2] = NULL;	
  37.148 -    req->op    = IO_OP_READ;
  37.149 -    req->root  = vdi->radix_root;
  37.150 -    req->lock  = vdi->radix_lock; 
  37.151 -    req->vaddr = vaddr;
  37.152 -    req->cb    = cb;
  37.153 -    req->param = param;
  37.154 -    req->state = READ_LOCKED;
  37.155 -
  37.156 -    block_rlock(req->lock, L1_IDX(vaddr), read_cb, req);
  37.157 -	
  37.158 -    return 0;
  37.159 -}
  37.160 -
  37.161 -
  37.162 -int   vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, 
  37.163 -                io_cb_t cb, void *param)
  37.164 -{
  37.165 -    struct io_req *req;
  37.166 -
  37.167 -    if (!VALID_VADDR(vaddr)) return ERR_BAD_VADDR;
  37.168 -    /* Every second line in the bottom-level radix tree is used to      */
  37.169 -    /* store crc32 values etc. We shift the vadder here to achied this. */
  37.170 -    vaddr <<= 1;
  37.171 -
  37.172 -    req = (struct io_req *)malloc(sizeof (struct io_req));
  37.173 -    if (req == NULL) return ERR_NOMEM; 
  37.174 -
  37.175 -    req->radix[0] = req->radix[1] = req->radix[2] = NULL;
  37.176 -    req->op     = IO_OP_WRITE;
  37.177 -    req->root   = vdi->radix_root;
  37.178 -    req->lock   = vdi->radix_lock; 
  37.179 -    req->vaddr  = vaddr;
  37.180 -    req->block  = block;
  37.181 -    /* Todo: add a pseodoheader to the block to include some location   */
  37.182 -    /* information in the CRC as well.                                  */
  37.183 -    req->bi.crc = (uint32_t) crc32(0L, Z_NULL, 0); 
  37.184 -    req->bi.crc = (uint32_t) crc32(req->bi.crc, block, BLOCK_SIZE); 
  37.185 -    req->bi.unused = 0xdeadbeef;
  37.186 -
  37.187 -    req->cb     = cb;
  37.188 -    req->param  = param;
  37.189 -    req->radix_addr[L1] = getid(req->root); /* for consistency */
  37.190 -    req->state  = WRITE_LOCKED;
  37.191 -
  37.192 -    block_wlock(req->lock, L1_IDX(vaddr), write_cb, req);
  37.193 -
  37.194 -
  37.195 -    return 0;
  37.196 -}
  37.197 -
  37.198 -static void read_cb(struct io_ret ret, void *param)
  37.199 -{
  37.200 -    struct io_req *req = (struct io_req *)param;
  37.201 -    radix_tree_node node;
  37.202 -    uint64_t idx;
  37.203 -    char *block;
  37.204 -    void *req_param;
  37.205 -
  37.206 -    DPRINTF("read_cb\n");
  37.207 -    /* get record */
  37.208 -    switch(req->state) {
  37.209 -    	
  37.210 -    case READ_LOCKED: 
  37.211 -    
  37.212 -        DPRINTF("READ_LOCKED\n");
  37.213 -    	req->state = READ_L1;
  37.214 -    	block_read(getid(req->root), read_cb, req); 
  37.215 -    	break;
  37.216 -    	
  37.217 -    case READ_L1: /* block is the radix root */
  37.218 -
  37.219 -        DPRINTF("READ_L1\n");
  37.220 -        block = IO_BLOCK(ret);
  37.221 -        if (block == NULL) goto fail;
  37.222 -        node = (radix_tree_node) block;
  37.223 -        idx  = getid( node[L1_IDX(req->vaddr)] );
  37.224 -        free(block);
  37.225 -        if ( idx == ZERO ) {
  37.226 -            req->state = RETURN_ZERO;
  37.227 -            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  37.228 -        } else {
  37.229 -            req->state = READ_L2;
  37.230 -            block_read(idx, read_cb, req);
  37.231 -        }
  37.232 -        break;
  37.233 -
  37.234 -    case READ_L2:
  37.235 -
  37.236 -        DPRINTF("READ_L2\n");
  37.237 -        block = IO_BLOCK(ret);
  37.238 -        if (block == NULL) goto fail;
  37.239 -        node = (radix_tree_node) block;
  37.240 -        idx  = getid( node[L2_IDX(req->vaddr)] );
  37.241 -        free(block);
  37.242 -        if ( idx == ZERO ) {
  37.243 -            req->state = RETURN_ZERO;
  37.244 -            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  37.245 -        } else {
  37.246 -            req->state = READ_L3;
  37.247 -            block_read(idx, read_cb, req);
  37.248 -        }
  37.249 -        break;
  37.250 -
  37.251 -    case READ_L3:
  37.252 -    {
  37.253 -        struct block_info *bi;
  37.254 -
  37.255 -        DPRINTF("READ_L3\n");
  37.256 -        block = IO_BLOCK(ret);
  37.257 -        if (block == NULL) goto fail;
  37.258 -        node = (radix_tree_node) block;
  37.259 -        idx  = getid( node[L3_IDX(req->vaddr)] );
  37.260 -        bi = (struct block_info *) &node[L3_IDX(req->vaddr) + 1];
  37.261 -        req->bi = *bi;
  37.262 -        free(block);
  37.263 -        if ( idx == ZERO )  {
  37.264 -            req->state = RETURN_ZERO;
  37.265 -            block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  37.266 -        } else {
  37.267 -            req->state = READ_DATA;
  37.268 -            block_read(idx, read_cb, req);
  37.269 -        }
  37.270 -        break;
  37.271 -    }
  37.272 -    case READ_DATA:
  37.273 -    {
  37.274 -        uint32_t crc;
  37.275 -
  37.276 -        DPRINTF("READ_DATA\n");
  37.277 -        block = IO_BLOCK(ret);
  37.278 -        if (block == NULL) goto fail;
  37.279 -
  37.280 -        /* crc check */
  37.281 -        crc = (uint32_t) crc32(0L, Z_NULL, 0); 
  37.282 -        crc = (uint32_t) crc32(crc, block, BLOCK_SIZE); 
  37.283 -        if (crc != req->bi.crc) {
  37.284 -            /* TODO: add a retry loop here.                          */
  37.285 -            /* Do this after the cache is added -- make sure to      */
  37.286 -            /* invalidate the bad page before reissuing the read.    */
  37.287 -
  37.288 -            warn("Bad CRC on vaddr (%Lu:%d)\n", req->vaddr, req->bi.unused);
  37.289 -#ifdef PRINT_BADCRC_PAGES
  37.290 -            {
  37.291 -                int j;
  37.292 -                for (j=0; j<BLOCK_SIZE; j++) {
  37.293 -                    if isprint(block[j]) {
  37.294 -                        printf("%c", block[j]);
  37.295 -                    } else {
  37.296 -                        printf(".");
  37.297 -                    }
  37.298 -                    if ((j % 64) == 0) printf("\n");
  37.299 -                }
  37.300 -            }
  37.301 -#endif /* PRINT_BADCRC_PAGES */
  37.302 -
  37.303 -            /* fast and loose for the moment. */
  37.304 -            /* goto fail;                     */
  37.305 -        }
  37.306 -
  37.307 -        req->retval = ret;
  37.308 -        req->state = READ_UNLOCKED;
  37.309 -        block_runlock(req->lock, L1_IDX(req->vaddr), read_cb, req);
  37.310 -        break;
  37.311 -    }
  37.312 -    case READ_UNLOCKED:
  37.313 -    {
  37.314 -        struct io_ret r;
  37.315 -        io_cb_t cb;
  37.316 -        DPRINTF("READ_UNLOCKED\n");
  37.317 -        req_param = req->param;
  37.318 -        r         = req->retval;
  37.319 -        cb        = req->cb;
  37.320 -        free(req);
  37.321 -        cb(r, req_param);
  37.322 -        break;
  37.323 -    }
  37.324 -    
  37.325 -    case RETURN_ZERO:
  37.326 -    {
  37.327 -        struct io_ret r;
  37.328 -        io_cb_t cb;
  37.329 -        DPRINTF("RETURN_ZERO\n");
  37.330 -        req_param = req->param;
  37.331 -        cb        = req->cb;
  37.332 -        free(req);
  37.333 -        r.type = IO_BLOCK_T;
  37.334 -        r.u.b = newblock();
  37.335 -        cb(r, req_param);
  37.336 -        break;
  37.337 -    }
  37.338 -        
  37.339 -    default:
  37.340 -    	DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
  37.341 -    	goto fail;
  37.342 -    }
  37.343 - 
  37.344 -    return;
  37.345 -
  37.346 - fail:
  37.347 -    {
  37.348 -        struct io_ret r;
  37.349 -        io_cb_t cb;
  37.350 -        DPRINTF("asyn_read had a read error.\n");
  37.351 -        req_param = req->param;
  37.352 -        r         = ret;
  37.353 -        cb        = req->cb;
  37.354 -        free(req);
  37.355 -        cb(r, req_param);
  37.356 -    }
  37.357 -
  37.358 -
  37.359 -}
  37.360 -
  37.361 -static void write_cb(struct io_ret r, void *param)
  37.362 -{
  37.363 -    struct io_req *req = (struct io_req *)param;
  37.364 -    radix_tree_node node;
  37.365 -    uint64_t a, addr;
  37.366 -    void *req_param;
  37.367 -    struct block_info *bi;
  37.368 -
  37.369 -    switch(req->state) {
  37.370 -    	
  37.371 -    case WRITE_LOCKED:
  37.372 -        
  37.373 -        DPRINTF("WRITE_LOCKED (%llu)\n", L1_IDX(req->vaddr));
  37.374 -    	req->state = READ_L1;
  37.375 -    	block_read(getid(req->root), write_cb, req); 
  37.376 -    	break;
  37.377 -    	
  37.378 -    case READ_L1: /* block is the radix root */
  37.379 -
  37.380 -        DPRINTF("READ_L1\n");
  37.381 -        node = (radix_tree_node) IO_BLOCK(r);
  37.382 -        if (node == NULL) goto fail;
  37.383 -        a    = node[L1_IDX(req->vaddr)];
  37.384 -        addr = getid(a);
  37.385 -
  37.386 -        req->radix_addr[L2] = addr;
  37.387 -        req->radix[L1] = node;
  37.388 -
  37.389 -        if ( addr == ZERO ) {
  37.390 -            /* L1 empty subtree: */
  37.391 -            req->state = ALLOC_DATA_L1z;
  37.392 -            block_alloc( req->block, write_cb, req );
  37.393 -        } else if ( !iswritable(a) ) {
  37.394 -            /* L1 fault: */
  37.395 -            req->state = READ_L2_L1f;
  37.396 -            block_read( addr, write_cb, req );
  37.397 -        } else {
  37.398 -            req->state = READ_L2;
  37.399 -            block_read( addr, write_cb, req );
  37.400 -        }
  37.401 -        break;
  37.402 -    
  37.403 -    case READ_L2:
  37.404 -
  37.405 -        DPRINTF("READ_L2\n");
  37.406 -        node = (radix_tree_node) IO_BLOCK(r);
  37.407 -        if (node == NULL) goto fail;
  37.408 -        a    = node[L2_IDX(req->vaddr)];
  37.409 -        addr = getid(a);
  37.410 -
  37.411 -        req->radix_addr[L3] = addr;
  37.412 -        req->radix[L2] = node;
  37.413 -
  37.414 -        if ( addr == ZERO ) {
  37.415 -            /* L2 empty subtree: */
  37.416 -            req->state = ALLOC_DATA_L2z;
  37.417 -            block_alloc( req->block, write_cb, req );
  37.418 -        } else if ( !iswritable(a) ) {
  37.419 -            /* L2 fault: */
  37.420 -            req->state = READ_L3_L2f;
  37.421 -            block_read( addr, write_cb, req );
  37.422 -        } else {
  37.423 -            req->state = READ_L3;
  37.424 -            block_read( addr, write_cb, req );
  37.425 -        }
  37.426 -        break;
  37.427 -    
  37.428 -    case READ_L3:
  37.429 -
  37.430 -        DPRINTF("READ_L3\n");
  37.431 -        node = (radix_tree_node) IO_BLOCK(r);
  37.432 -        if (node == NULL) goto fail;
  37.433 -        a    = node[L3_IDX(req->vaddr)];
  37.434 -        addr = getid(a);
  37.435 -
  37.436 -        req->radix[L3] = node;
  37.437 -
  37.438 -        if ( addr == ZERO ) {
  37.439 -            /* L3 fault: */
  37.440 -            req->state = ALLOC_DATA_L3z;
  37.441 -            block_alloc( req->block, write_cb, req );
  37.442 -        } else if ( !iswritable(a) ) {
  37.443 -            /* L3 fault: */
  37.444 -            req->state = ALLOC_DATA_L3f;
  37.445 -            block_alloc( req->block, write_cb, req );
  37.446 -        } else {
  37.447 -            req->state = WRITE_DATA;
  37.448 -            block_write( addr, req->block, write_cb, req );
  37.449 -        }
  37.450 -        break;
  37.451 -    
  37.452 -    case WRITE_DATA:
  37.453 -
  37.454 -        DPRINTF("WRITE_DATA\n");
  37.455 -        /* The L3 radix points to the correct block, we just need to  */
  37.456 -        /* update the crc.                                            */
  37.457 -        if (IO_INT(r) < 0) goto fail;
  37.458 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  37.459 -        req->bi.unused = 101;
  37.460 -        *bi = req->bi;
  37.461 -        req->state = WRITE_L3;
  37.462 -        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
  37.463 -        break;
  37.464 -    
  37.465 -    /* L3 Zero Path: */
  37.466 -
  37.467 -    case ALLOC_DATA_L3z:
  37.468 -
  37.469 -        DPRINTF("ALLOC_DATA_L3z\n");
  37.470 -        addr = IO_ADDR(r);
  37.471 -        a = writable(addr);
  37.472 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  37.473 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  37.474 -        req->bi.unused = 102;
  37.475 -        *bi = req->bi;
  37.476 -        req->state = WRITE_L3_L3z;
  37.477 -        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
  37.478 -        break;
  37.479 -    
  37.480 -    /* L3 Fault Path: */
  37.481 -
  37.482 -    case ALLOC_DATA_L3f:
  37.483 -    
  37.484 -        DPRINTF("ALLOC_DATA_L3f\n");
  37.485 -        addr = IO_ADDR(r);
  37.486 -        a = writable(addr);
  37.487 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  37.488 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  37.489 -        req->bi.unused = 103;
  37.490 -        *bi = req->bi;
  37.491 -        req->state = WRITE_L3_L3f;
  37.492 -        block_write(req->radix_addr[L3], (char*)req->radix[L3], write_cb, req);
  37.493 -        break;
  37.494 -
  37.495 -    /* L2 Zero Path: */
  37.496 -        
  37.497 -    case ALLOC_DATA_L2z:
  37.498 -
  37.499 -        DPRINTF("ALLOC_DATA_L2z\n");
  37.500 -        addr = IO_ADDR(r);
  37.501 -        a = writable(addr);
  37.502 -        req->radix[L3] = newblock();
  37.503 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  37.504 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  37.505 -        req->bi.unused = 104;
  37.506 -        *bi = req->bi;
  37.507 -        req->state = ALLOC_L3_L2z;
  37.508 -        block_alloc( (char*)req->radix[L3], write_cb, req );
  37.509 -        break;
  37.510 -
  37.511 -    case ALLOC_L3_L2z:
  37.512 -
  37.513 -        DPRINTF("ALLOC_L3_L2z\n");
  37.514 -        addr = IO_ADDR(r);
  37.515 -        a = writable(addr);
  37.516 -        req->radix[L2][L2_IDX(req->vaddr)] = a;
  37.517 -        req->state = WRITE_L2_L2z;
  37.518 -        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
  37.519 -        break;
  37.520 -        
  37.521 -    /* L2 Fault Path: */
  37.522 -        
  37.523 -    case READ_L3_L2f:
  37.524 -    
  37.525 -    	DPRINTF("READ_L3_L2f\n");
  37.526 -        node = (radix_tree_node) IO_BLOCK(r);
  37.527 -        clear_L3_w_bits(node);
  37.528 -        if (node == NULL) goto fail;
  37.529 -        a    = node[L2_IDX(req->vaddr)];
  37.530 -        addr = getid(a);
  37.531 -
  37.532 -        req->radix[L3] = node;
  37.533 -        req->state = ALLOC_DATA_L2f;
  37.534 -        block_alloc( req->block, write_cb, req );
  37.535 -        break;
  37.536 -                
  37.537 -    case ALLOC_DATA_L2f:
  37.538 -
  37.539 -        DPRINTF("ALLOC_DATA_L2f\n");
  37.540 -        addr = IO_ADDR(r);
  37.541 -        a = writable(addr);
  37.542 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  37.543 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  37.544 -        req->bi.unused = 105;
  37.545 -        *bi = req->bi;
  37.546 -        req->state = ALLOC_L3_L2f;
  37.547 -        block_alloc( (char*)req->radix[L3], write_cb, req );
  37.548 -        break;
  37.549 -
  37.550 -    case ALLOC_L3_L2f:
  37.551 -
  37.552 -        DPRINTF("ALLOC_L3_L2f\n");
  37.553 -        addr = IO_ADDR(r);
  37.554 -        a = writable(addr);
  37.555 -        req->radix[L2][L2_IDX(req->vaddr)] = a;
  37.556 -        req->state = WRITE_L2_L2f;
  37.557 -        block_write(req->radix_addr[L2], (char*)req->radix[L2], write_cb, req);
  37.558 -        break;
  37.559 -        
  37.560 -    /* L1 Zero Path: */
  37.561 -    
  37.562 -    case ALLOC_DATA_L1z:
  37.563 -
  37.564 -        DPRINTF("ALLOC_DATA_L1z\n");
  37.565 -        addr = IO_ADDR(r);
  37.566 -        a = writable(addr);
  37.567 -        req->radix[L3] = newblock();
  37.568 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  37.569 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  37.570 -        req->bi.unused = 106;
  37.571 -        *bi = req->bi;
  37.572 -        req->state = ALLOC_L3_L1z;
  37.573 -        block_alloc( (char*)req->radix[L3], write_cb, req );
  37.574 -        break;
  37.575 -        
  37.576 -    case ALLOC_L3_L1z:
  37.577 -
  37.578 -        DPRINTF("ALLOC_L3_L1z\n");
  37.579 -        addr = IO_ADDR(r);
  37.580 -        a = writable(addr);
  37.581 -        req->radix[L2] = newblock();
  37.582 -        req->radix[L2][L2_IDX(req->vaddr)] = a;
  37.583 -        req->state = ALLOC_L2_L1z;
  37.584 -        block_alloc( (char*)req->radix[L2], write_cb, req );
  37.585 -        break;
  37.586 -
  37.587 -    case ALLOC_L2_L1z:
  37.588 -
  37.589 -        DPRINTF("ALLOC_L2_L1z\n");
  37.590 -        addr = IO_ADDR(r);
  37.591 -        a = writable(addr);
  37.592 -        req->radix[L1][L1_IDX(req->vaddr)] = a;
  37.593 -        req->state = WRITE_L1_L1z;
  37.594 -        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
  37.595 -        break;
  37.596 -
  37.597 -    /* L1 Fault Path: */
  37.598 -        
  37.599 -    case READ_L2_L1f:
  37.600 -    
  37.601 -    	DPRINTF("READ_L2_L1f\n");
  37.602 -        node = (radix_tree_node) IO_BLOCK(r);
  37.603 -        clear_w_bits(node);
  37.604 -        if (node == NULL) goto fail;
  37.605 -        a    = node[L2_IDX(req->vaddr)];
  37.606 -        addr = getid(a);
  37.607 -
  37.608 -        req->radix_addr[L3] = addr;
  37.609 -        req->radix[L2] = node;
  37.610 -        
  37.611 -        if (addr == ZERO) {
  37.612 -            /* nothing below L2, create an empty L3 and alloc data. */
  37.613 -            /* (So skip READ_L3_L1f.) */
  37.614 -            req->radix[L3] = newblock();
  37.615 -            req->state = ALLOC_DATA_L1f;
  37.616 -            block_alloc( req->block, write_cb, req );
  37.617 -        } else {
  37.618 -            req->state = READ_L3_L1f;
  37.619 -            block_read( addr, write_cb, req );
  37.620 -        }
  37.621 -        break;
  37.622 -        
  37.623 -    case READ_L3_L1f:
  37.624 -    
  37.625 -    	DPRINTF("READ_L3_L1f\n");
  37.626 -        node = (radix_tree_node) IO_BLOCK(r);
  37.627 -        clear_L3_w_bits(node);
  37.628 -        if (node == NULL) goto fail;
  37.629 -        a    = node[L2_IDX(req->vaddr)];
  37.630 -        addr = getid(a);
  37.631 -
  37.632 -        req->radix[L3] = node;
  37.633 -        req->state = ALLOC_DATA_L1f;
  37.634 -        block_alloc( req->block, write_cb, req );
  37.635 -        break;
  37.636 -                
  37.637 -    case ALLOC_DATA_L1f:
  37.638 -
  37.639 -        DPRINTF("ALLOC_DATA_L1f\n");
  37.640 -        addr = IO_ADDR(r);
  37.641 -        a = writable(addr);
  37.642 -        req->radix[L3][L3_IDX(req->vaddr)] = a;
  37.643 -        bi  = (struct block_info *) &req->radix[L3][L3_IDX(req->vaddr)+1];
  37.644 -        req->bi.unused = 107;
  37.645 -        *bi = req->bi;
  37.646 -        req->state = ALLOC_L3_L1f;
  37.647 -        block_alloc( (char*)req->radix[L3], write_cb, req );
  37.648 -        break;
  37.649 -
  37.650 -    case ALLOC_L3_L1f:
  37.651 -
  37.652 -        DPRINTF("ALLOC_L3_L1f\n");
  37.653 -        addr = IO_ADDR(r);
  37.654 -        a = writable(addr);
  37.655 -        req->radix[L2][L2_IDX(req->vaddr)] = a;
  37.656 -        req->state = ALLOC_L2_L1f;
  37.657 -        block_alloc( (char*)req->radix[L2], write_cb, req );
  37.658 -        break;
  37.659 -
  37.660 -    case ALLOC_L2_L1f:
  37.661 -
  37.662 -        DPRINTF("ALLOC_L2_L1f\n");
  37.663 -        addr = IO_ADDR(r);
  37.664 -        a = writable(addr);
  37.665 -        req->radix[L1][L1_IDX(req->vaddr)] = a;
  37.666 -        req->state = WRITE_L1_L1f;
  37.667 -        block_write(req->radix_addr[L1], (char*)req->radix[L1], write_cb, req);
  37.668 -        break;
  37.669 -
  37.670 -    case WRITE_L3:
  37.671 -    case WRITE_L3_L3z:
  37.672 -    case WRITE_L3_L3f:
  37.673 -    case WRITE_L2_L2z:
  37.674 -    case WRITE_L2_L2f:
  37.675 -    case WRITE_L1_L1z:
  37.676 -    case WRITE_L1_L1f:
  37.677 -    {
  37.678 -    	int i;
  37.679 -        DPRINTF("DONE\n");
  37.680 -        /* free any saved node vals. */
  37.681 -        for (i=0; i<3; i++)
  37.682 -            if (req->radix[i] != 0) free(req->radix[i]);
  37.683 -        req->retval = r;
  37.684 -        req->state = WRITE_UNLOCKED;
  37.685 -        block_wunlock(req->lock, L1_IDX(req->vaddr), write_cb, req);
  37.686 -        break;
  37.687 -    }
  37.688 -    case WRITE_UNLOCKED:
  37.689 -    {
  37.690 -        struct io_ret r;
  37.691 -        io_cb_t cb;
  37.692 -        DPRINTF("WRITE_UNLOCKED!\n");
  37.693 -        req_param = req->param;
  37.694 -        r         = req->retval;
  37.695 -        cb        = req->cb;
  37.696 -        free(req);
  37.697 -        cb(r, req_param);
  37.698 -        break;
  37.699 -    }
  37.700 -        
  37.701 -    default:
  37.702 -    	DPRINTF("*** Write: Bad state! (%d) ***\n", req->state);
  37.703 -    	goto fail;
  37.704 -    }
  37.705 -    
  37.706 -    return;
  37.707 -    
  37.708 - fail:
  37.709 -    {
  37.710 -        struct io_ret r;
  37.711 -        io_cb_t cb;
  37.712 -        int i;
  37.713 -
  37.714 -        DPRINTF("asyn_write had a read error mid-way.\n");
  37.715 -        req_param = req->param;
  37.716 -        cb        = req->cb;
  37.717 -        r.type = IO_INT_T;
  37.718 -        r.u.i  = -1;
  37.719 -        /* free any saved node vals. */
  37.720 -        for (i=0; i<3; i++)
  37.721 -            free(req->radix[i]);
  37.722 -        free(req);
  37.723 -        cb(r, req_param);
  37.724 -    }
  37.725 -}
  37.726 -
  37.727 -char *vdi_read_s(vdi_t *vdi, uint64_t vaddr)
  37.728 -{
  37.729 -    pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
  37.730 -    char *block = NULL;
  37.731 -    int ret;
  37.732 -
  37.733 -    void reads_cb(struct io_ret r, void *param) 
  37.734 -    {
  37.735 -        block = IO_BLOCK(r);
  37.736 -        pthread_mutex_unlock((pthread_mutex_t *)param);
  37.737 -    }
  37.738 -
  37.739 -    pthread_mutex_lock(&m);
  37.740 -    ret = vdi_read(vdi, vaddr, reads_cb, &m);
  37.741 -
  37.742 -    if (ret == 0) pthread_mutex_lock(&m);
  37.743 -    
  37.744 -    return block;
  37.745 -}
  37.746 -
  37.747 -
  37.748 -int vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block)
  37.749 -{
  37.750 -    pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
  37.751 -    int ret, result;
  37.752 -
  37.753 -    void writes_cb(struct io_ret r, void *param) 
  37.754 -    {
  37.755 -        result = IO_INT(r);
  37.756 -        pthread_mutex_unlock((pthread_mutex_t *)param);
  37.757 -    }
  37.758 -
  37.759 -    pthread_mutex_lock(&m);
  37.760 -    ret = vdi_write(vdi, vaddr, block, writes_cb, &m);
  37.761 -
  37.762 -    if (ret == 0) pthread_mutex_lock(&m);
  37.763 -    
  37.764 -    return result;
  37.765 -}
    38.1 --- a/tools/blktap/parallax/requests-async.h	Fri Jun 23 15:26:01 2006 -0600
    38.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.3 @@ -1,29 +0,0 @@
    38.4 -#ifndef _REQUESTSASYNC_H_
    38.5 -#define _REQUESTSASYNC_H_
    38.6 -
    38.7 -#include "block-async.h"
    38.8 -#include "blockstore.h" /* for newblock etc. */
    38.9 -
   38.10 -/*
   38.11 -#define BLOCK_SIZE 4096
   38.12 -#define ZERO 0ULL
   38.13 -#define getid(x) (((x)>>1)&0x7fffffffffffffffLLU)
   38.14 -#define iswritable(x) (((x) & 1LLU) != 0)
   38.15 -#define writable(x) (((x) << 1) | 1LLU)
   38.16 -#define readonly(x) ((uint64_t)((x) << 1))
   38.17 -*/
   38.18 -
   38.19 -#define VADDR_MASK 0x0000000003ffffffLLU /* 26-bits = 256Gig */
   38.20 -#define VALID_VADDR(x) (((x) & VADDR_MASK) == (x))
   38.21 -
   38.22 -int vdi_read (vdi_t *vdi, uint64_t vaddr, io_cb_t cb, void *param);
   38.23 -int vdi_write(vdi_t *vdi, uint64_t vaddr, char *block, io_cb_t cb, void *param);
   38.24 -             
   38.25 -/* synchronous versions: */
   38.26 -char *vdi_read_s (vdi_t *vdi, uint64_t vaddr);
   38.27 -int   vdi_write_s(vdi_t *vdi, uint64_t vaddr, char *block);
   38.28 -
   38.29 -#define ERR_BAD_VADDR  -1
   38.30 -#define ERR_NOMEM      -2
   38.31 -
   38.32 -#endif //_REQUESTSASYNC_H_
    39.1 --- a/tools/blktap/parallax/snaplog.c	Fri Jun 23 15:26:01 2006 -0600
    39.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.3 @@ -1,238 +0,0 @@
    39.4 -/**************************************************************************
    39.5 - * 
    39.6 - * snaplog.c
    39.7 - *
    39.8 - * Snapshot log on-disk data structure.
    39.9 - *
   39.10 - */
   39.11 - 
   39.12 - /* VDI histories are made from chains of snapshot logs.  These logs record 
   39.13 -  * the (radix) root and timestamp of individual snapshots.
   39.14 -  *
   39.15 -  * creation of a new VDI involves 'forking' a snapshot log, by creating a 
   39.16 -  * new, empty log (in a new VDI) and parenting it off of a record in an 
   39.17 -  * existing snapshot log.
   39.18 -  *
   39.19 -  * snapshot log blocks have at most one writer.
   39.20 -  */
   39.21 -
   39.22 -#include <stdio.h>
   39.23 -#include <stdlib.h>
   39.24 -#include <sys/time.h>
   39.25 -#include "blockstore.h"
   39.26 -#include "snaplog.h"
   39.27 -
   39.28 -
   39.29 -
   39.30 -snap_block_t *snap_get_block(uint64_t block)
   39.31 -{
   39.32 -    snap_block_t *blk = (snap_block_t *)readblock(block);
   39.33 -    
   39.34 -    if ( blk == NULL)
   39.35 -        return NULL;
   39.36 -    if ( blk->hdr.magic != SNAP_MAGIC ) {
   39.37 -        freeblock(blk);
   39.38 -        return NULL;
   39.39 -    }
   39.40 -    
   39.41 -    return blk;
   39.42 -}
   39.43 -    
   39.44 -int snap_get_id(snap_id_t *id, snap_rec_t *target)
   39.45 -{
   39.46 -    snap_block_t *blk;
   39.47 -    
   39.48 -    if ( id == NULL )
   39.49 -        return -1;
   39.50 -    
   39.51 -    blk = snap_get_block(id->block);
   39.52 -    
   39.53 -    if ( blk == NULL ) 
   39.54 -        return -1;
   39.55 -    
   39.56 -    if ( id->index > blk->hdr.nr_entries ) {
   39.57 -        freeblock(blk);
   39.58 -        return -1;
   39.59 -    }
   39.60 -    
   39.61 -    *target = blk->snaps[id->index];
   39.62 -    freeblock(blk);
   39.63 -    return 0;
   39.64 -}
   39.65 -
   39.66 -int __snap_block_create(snap_id_t *parent_id, snap_id_t *fork_id,
   39.67 -                                  snap_id_t *new_id)
   39.68 -{
   39.69 -    snap_rec_t parent_rec, fork_rec;
   39.70 -    snap_block_t *blk, *pblk;
   39.71 -    /*
   39.72 -    if ( (parent_id != NULL) && (snap_get_id(parent_id, &parent_rec) != 0) )
   39.73 -        return -1;    
   39.74 -    
   39.75 -    if ( (fork_id != NULL) && (snap_get_id(fork_id, &fork_rec) != 0) )
   39.76 -        return -1;   
   39.77 -*/
   39.78 -    blk = (snap_block_t *)newblock();
   39.79 -    blk->hdr.magic  = SNAP_MAGIC;
   39.80 -    blk->hdr.nr_entries  = 0;
   39.81 -    blk->hdr.log_entries = 0;
   39.82 -    blk->hdr.immutable   = 0;
   39.83 -    
   39.84 -    if (   (parent_id  != NULL) 
   39.85 -        && (parent_id->block != fork_id->block) 
   39.86 -        && (parent_id->block != 0)) {
   39.87 -        
   39.88 -        pblk = snap_get_block(parent_id->block);
   39.89 -        blk->hdr.log_entries = pblk->hdr.log_entries;
   39.90 -        freeblock(pblk);
   39.91 -    }
   39.92 -    
   39.93 -    if (parent_id != NULL) {
   39.94 -        blk->hdr.parent_block = *parent_id;
   39.95 -        blk->hdr.fork_block   = *fork_id;
   39.96 -    } else {
   39.97 -        blk->hdr.parent_block = null_snap_id;
   39.98 -        blk->hdr.fork_block   = null_snap_id;
   39.99 -    }
  39.100 -    
  39.101 -    new_id->index = 0;
  39.102 -    new_id->block = allocblock(blk);
  39.103 -    freeblock(blk);
  39.104 -    if (new_id->block == 0)
  39.105 -        return -1;
  39.106 -    
  39.107 -    return 0;
  39.108 -}
  39.109 -
  39.110 -int snap_block_create(snap_id_t *parent_id, snap_id_t *new_id)
  39.111 -{
  39.112 -    return __snap_block_create(parent_id, parent_id, new_id);
  39.113 -}
  39.114 -
  39.115 -int snap_append(snap_id_t *old_id, snap_rec_t *rec, snap_id_t *new_id)
  39.116 -{
  39.117 -    snap_id_t id = *old_id;
  39.118 -    snap_block_t *blk = snap_get_block(id.block);
  39.119 -    
  39.120 -    if ( rec->deleted == 1 ) {
  39.121 -        printf("Attempt to append a deleted snapshot!\n");
  39.122 -        return -1;
  39.123 -    }
  39.124 -    
  39.125 -    if ( blk->hdr.immutable != 0 ) {
  39.126 -        printf("Attempt to snap an immutable snap block!\n");
  39.127 -        return -1;
  39.128 -    }
  39.129 -    
  39.130 -    new_id->block = id.block;
  39.131 -    
  39.132 -    if (blk->hdr.nr_entries == SNAPS_PER_BLOCK) {
  39.133 -        int ret;
  39.134 -        
  39.135 -        id.index--; /* make id point to the last full record */
  39.136 -        
  39.137 -        ret = __snap_block_create(&id, &blk->hdr.fork_block, new_id);
  39.138 -        if ( ret != 0 ) {
  39.139 -            freeblock(blk);
  39.140 -            return -1;
  39.141 -        }
  39.142 -        
  39.143 -        blk->hdr.immutable = 1;
  39.144 -        writeblock(id.block, blk);
  39.145 -        freeblock(blk);
  39.146 -        blk = snap_get_block(new_id->block);
  39.147 -        id = *new_id;
  39.148 -    }
  39.149 -    
  39.150 -    blk->snaps[blk->hdr.nr_entries] = *rec;
  39.151 -    blk->hdr.nr_entries++;
  39.152 -    blk->hdr.log_entries++;
  39.153 -    new_id->index = blk->hdr.nr_entries;
  39.154 -    //printf("snap: %u %u\n", blk->hdr.nr_entries, blk->hdr.log_entries);
  39.155 -    writeblock(id.block, blk);
  39.156 -    freeblock(blk);
  39.157 -    return 0;
  39.158 -}
  39.159 -
  39.160 -int snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id)
  39.161 -{
  39.162 -    snap_block_t *p_blk, *c_blk, *blk;
  39.163 -    snap_rec_t   *p_rec, *c_rec;
  39.164 -    int ret = -1;
  39.165 -    
  39.166 -    p_blk = snap_get_block(p_id->block);
  39.167 -    
  39.168 -    if (p_blk == NULL) return(-1);
  39.169 -    
  39.170 -    if (c_id->block == p_id->block)
  39.171 -    {
  39.172 -        c_blk = p_blk;
  39.173 -    } else {
  39.174 -         c_blk = snap_get_block(c_id->block);
  39.175 -    }
  39.176 -    
  39.177 -    if (p_blk == NULL) {
  39.178 -        freeblock(p_blk);
  39.179 -        return(-1);
  39.180 -    }
  39.181 -     
  39.182 -    /* parent and child must not be deleted. */
  39.183 -    p_rec = &p_blk->snaps[p_id->index];
  39.184 -    c_rec = &c_blk->snaps[c_id->index];
  39.185 -    /*
  39.186 -    if ( (p_rec->deleted == 1) || (c_rec->deleted == 1) ) {
  39.187 -        printf("One of those snaps is already deleted.\n");
  39.188 -        goto done;
  39.189 -    }
  39.190 -    */
  39.191 -    /* first non-deleted thing in the log before child must be parent. */
  39.192 -    
  39.193 -    /* XXX todo: text the range here for delete (and eventually fork) bits) */
  39.194 -    /* for now, snaps must be consecutive, on the same log page: */
  39.195 -    
  39.196 -    if ((p_id->block != c_id->block) || (p_id->index != c_id->index-1))
  39.197 -    {
  39.198 -        printf("Deleting non-consecutive snaps is not done yet.\n");
  39.199 -        goto done;
  39.200 -    }
  39.201 -    
  39.202 -    /* mark parent as deleted XXX: may need to lock parent block here.*/
  39.203 -    p_rec->deleted = 1;
  39.204 -    writeblock(p_id->block, p_blk);
  39.205 -    
  39.206 -    /* delete the parent */
  39.207 -    printf("collapse(%Ld, %Ld)\n", p_rec->radix_root, c_rec->radix_root);
  39.208 -    ret = collapse(height, p_rec->radix_root, c_rec->radix_root);
  39.209 -    
  39.210 -    /* return the number of blocks reclaimed. */
  39.211 -    
  39.212 -done:
  39.213 -    if (c_blk != p_blk) freeblock(c_blk);
  39.214 -    freeblock(p_blk);
  39.215 -    
  39.216 -    return(ret);
  39.217 -}
  39.218 -
  39.219 -void snap_print_history(snap_id_t *snap_id)
  39.220 -{
  39.221 -    snap_id_t id = *snap_id;
  39.222 -    unsigned int idx = id.index;
  39.223 -    snap_block_t *new_blk, *blk = snap_get_block(id.block);
  39.224 -    
  39.225 -    while ( blk ) {
  39.226 -        printf("[Snap block %Ld]:\n", id.block);
  39.227 -        do {
  39.228 -            printf("   %03u: root: %Ld ts: %ld.%ld\n", idx, 
  39.229 -                    blk->snaps[idx].radix_root,
  39.230 -                    blk->snaps[idx].timestamp.tv_sec,
  39.231 -                    blk->snaps[idx].timestamp.tv_usec);
  39.232 -        } while (idx-- != 0);
  39.233 -        
  39.234 -        id = blk->hdr.parent_block;
  39.235 -        if (id.block != 0) {
  39.236 -            new_blk = snap_get_block(id.block);
  39.237 -        }
  39.238 -        freeblock(blk);
  39.239 -        blk = new_blk;
  39.240 -    }
  39.241 -}
    40.1 --- a/tools/blktap/parallax/snaplog.h	Fri Jun 23 15:26:01 2006 -0600
    40.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.3 @@ -1,61 +0,0 @@
    40.4 -/**************************************************************************
    40.5 - * 
    40.6 - * snaplog.h
    40.7 - *
    40.8 - * Snapshot log on-disk data structure.
    40.9 - *
   40.10 - */
   40.11 - 
   40.12 -#include "radix.h"
   40.13 -#include "blockstore.h"    /* for BLOCK_SIZE */
   40.14 - 
   40.15 -#ifndef __SNAPLOG_H__
   40.16 -#define __SNAPLOG_H__
   40.17 -
   40.18 -typedef struct snap_id {
   40.19 -    uint64_t            block;
   40.20 -    unsigned int   index;
   40.21 -} snap_id_t;
   40.22 -
   40.23 -typedef struct snap_rec {
   40.24 -    uint64_t            radix_root;
   40.25 -    struct timeval timestamp;
   40.26 -    /* flags: */
   40.27 -    unsigned       deleted:1;
   40.28 -} snap_rec_t;
   40.29 -
   40.30 -
   40.31 -int  snap_block_create(snap_id_t *parent_id, snap_id_t *new_id);
   40.32 -int  snap_append(snap_id_t *id, snap_rec_t *rec, snap_id_t *new_id);
   40.33 -int  snap_collapse(int height, snap_id_t *p_id, snap_id_t *c_id);
   40.34 -void snap_print_history(snap_id_t *snap_id);
   40.35 -int  snap_get_id(snap_id_t *id, snap_rec_t *target);
   40.36 -
   40.37 -
   40.38 -/* exported for vdi debugging */
   40.39 -#define SNAP_MAGIC 0xff00ff0aa0ff00ffLL
   40.40 -
   40.41 -static const snap_id_t null_snap_id = { 0, 0 }; 
   40.42 -
   40.43 -typedef struct snap_block_hdr {
   40.44 -    uint64_t            magic;
   40.45 -    snap_id_t      parent_block; /* parent block within this chain */
   40.46 -    snap_id_t      fork_block;   /* where this log was forked */
   40.47 -    unsigned       log_entries;  /* total entries since forking */
   40.48 -    unsigned short nr_entries;   /* entries in snaps[] */
   40.49 -    unsigned short immutable;    /* has this snap page become immutable? */
   40.50 -} snap_block_hdr_t;
   40.51 -
   40.52 -
   40.53 -#define SNAPS_PER_BLOCK \
   40.54 -    ((BLOCK_SIZE - sizeof(snap_block_hdr_t)) / sizeof(snap_rec_t))
   40.55 -
   40.56 -typedef struct snap_block {
   40.57 -    snap_block_hdr_t hdr;
   40.58 -    snap_rec_t       snaps[SNAPS_PER_BLOCK];
   40.59 -} snap_block_t;
   40.60 -    
   40.61 -
   40.62 -snap_block_t *snap_get_block(uint64_t block);
   40.63 -
   40.64 -#endif /* __SNAPLOG_H__ */
    41.1 --- a/tools/blktap/parallax/vdi.c	Fri Jun 23 15:26:01 2006 -0600
    41.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.3 @@ -1,367 +0,0 @@
    41.4 -/**************************************************************************
    41.5 - * 
    41.6 - * vdi.c
    41.7 - *
    41.8 - * Virtual Disk Image (VDI) Interfaces
    41.9 - *
   41.10 - */
   41.11 - 
   41.12 -#include <stdio.h>
   41.13 -#include <stdlib.h>
   41.14 -#include <fcntl.h>
   41.15 -#include <string.h>
   41.16 -#include <sys/time.h>
   41.17 -#include <pthread.h>
   41.18 -#include "blockstore.h"
   41.19 -#include "block-async.h"
   41.20 -#include "requests-async.h"
   41.21 -#include "radix.h"
   41.22 -#include "vdi.h"
   41.23 -                    
   41.24 -#define VDI_REG_BLOCK   2LL
   41.25 -#define VDI_RADIX_ROOT  writable(3)
   41.26 -                                                            
   41.27 -#if 0
   41.28 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   41.29 -#else
   41.30 -#define DPRINTF(_f, _a...) ((void)0)
   41.31 -#endif
   41.32 -
   41.33 -/* I haven't decided about this registry stuff, so this is just a really
   41.34 - * quick lash-up so that there is some way to track VDIs.
   41.35 - *
   41.36 - * (Most vdi access should be with a direct handle to the block, so this
   41.37 - *  registry is just for start-of-day lookup and other control operations.)
   41.38 - */
   41.39 -
   41.40 -vdi_registry_t *create_vdi_registry(void)
   41.41 -{
   41.42 -    vdi_registry_t *reg = (vdi_registry_t *)newblock();
   41.43 -    
   41.44 -    if (reg == NULL)
   41.45 -        return NULL;
   41.46 -    
   41.47 -    /* zero-fill the vdi radix root while we have an empty block. */
   41.48 -    writeblock(VDI_RADIX_ROOT, (void *)reg);
   41.49 -    
   41.50 -    
   41.51 -    DPRINTF("[vdi.c] Creating VDI registry!\n");
   41.52 -    reg->magic      = VDI_REG_MAGIC;
   41.53 -    reg->nr_vdis    = 0;
   41.54 -    
   41.55 -    writeblock(VDI_REG_BLOCK, (void *)reg);
   41.56 -    
   41.57 -    return reg;
   41.58 -}
   41.59 -    
   41.60 -vdi_registry_t *get_vdi_registry(void)
   41.61 -{
   41.62 -    vdi_registry_t *vdi_reg = (vdi_registry_t *)readblock(VDI_REG_BLOCK);
   41.63 -    
   41.64 -    if ( vdi_reg == NULL )
   41.65 -        vdi_reg = create_vdi_registry();
   41.66 -    
   41.67 -    if ( vdi_reg->magic != VDI_REG_MAGIC ) {
   41.68 -        freeblock(vdi_reg);
   41.69 -        return NULL;
   41.70 -    }
   41.71 -    
   41.72 -    return vdi_reg;
   41.73 -}
   41.74 -
   41.75 -
   41.76 -vdi_t *vdi_create(snap_id_t *parent_snap, char *name)
   41.77 -{
   41.78 -    int ret;
   41.79 -    vdi_t *vdi;
   41.80 -    vdi_registry_t *vdi_reg;
   41.81 -    snap_rec_t snap_rec;
   41.82 -    
   41.83 -    /* create a vdi struct */
   41.84 -    vdi = newblock();
   41.85 -    if (vdi == NULL) 
   41.86 -        return NULL;
   41.87 -    
   41.88 -    if ( snap_get_id(parent_snap, &snap_rec) == 0 ) {
   41.89 -        vdi->radix_root = snapshot(snap_rec.radix_root);
   41.90 -    } else {
   41.91 -        vdi->radix_root = allocblock((void *)vdi); /* vdi is just zeros here */
   41.92 -        vdi->radix_root = writable(vdi->radix_root); /* grr. */
   41.93 -    }
   41.94 -    
   41.95 -    /* create a snapshot log, and add it to the vdi struct */
   41.96 -    
   41.97 -    ret = snap_block_create(parent_snap, &vdi->snap);
   41.98 -    if ( ret != 0 ) {
   41.99 -        DPRINTF("Error getting snap block in vdi_create.\n");
  41.100 -        freeblock(vdi);
  41.101 -        return NULL;
  41.102 -    }
  41.103 -            
  41.104 -    /* append the vdi to the registry, fill block and id.             */
  41.105 -    /* implicit allocation means we have to write the vdi twice here. */
  41.106 -    vdi_reg    = get_vdi_registry();
  41.107 -    if ( vdi_reg == NULL ) {
  41.108 -        freeblock(vdi);
  41.109 -        return NULL;
  41.110 -    }
  41.111 -    
  41.112 -    vdi->block = allocblock((void *)vdi);
  41.113 -    vdi->id    = vdi_reg->nr_vdis++;
  41.114 -    strncpy(vdi->name, name, VDI_NAME_SZ);
  41.115 -    vdi->name[VDI_NAME_SZ] = '\0';
  41.116 -    vdi->radix_lock = NULL; /* for tidiness */
  41.117 -    writeblock(vdi->block, (void *)vdi);
  41.118 -    
  41.119 -    update(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi->id, vdi->block);
  41.120 -    writeblock(VDI_REG_BLOCK, (void *)vdi_reg);
  41.121 -    freeblock(vdi_reg);
  41.122 -    
  41.123 -    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
  41.124 -    if (vdi->radix_lock == NULL) 
  41.125 -    {
  41.126 -    	perror("couldn't malloc radix_lock for new vdi!");
  41.127 -    	freeblock(vdi);
  41.128 -    	return NULL;
  41.129 -    }
  41.130 -    radix_lock_init(vdi->radix_lock);
  41.131 -    
  41.132 -    return vdi;
  41.133 -}
  41.134 -
  41.135 -/* vdi_get and vdi_put currently act more like alloc/free -- they don't 
  41.136 - * do refcount-based allocation.  
  41.137 - */
  41.138 -vdi_t *vdi_get(uint64_t vdi_id)
  41.139 -{
  41.140 -    uint64_t vdi_blk;
  41.141 -    vdi_t *vdi;
  41.142 -    
  41.143 -    vdi_blk = lookup(VDI_REG_HEIGHT, VDI_RADIX_ROOT, vdi_id);
  41.144 -    
  41.145 -    if ( vdi_blk == 0 )
  41.146 -        return NULL;
  41.147 -    
  41.148 -    vdi = (vdi_t *)readblock(vdi_blk);
  41.149 -    
  41.150 -    vdi->radix_lock = (struct radix_lock *)malloc(sizeof(struct radix_lock));
  41.151 -    if (vdi->radix_lock == NULL) 
  41.152 -    {
  41.153 -    	perror("couldn't malloc radix_lock for new vdi!");
  41.154 -    	freeblock(vdi);
  41.155 -    	return NULL;
  41.156 -    }
  41.157 -    radix_lock_init(vdi->radix_lock);
  41.158 -    
  41.159 -    return vdi;
  41.160 -}
  41.161 -
  41.162 -void vdi_put(vdi_t *vdi)
  41.163 -{
  41.164 -    free(vdi->radix_lock);
  41.165 -    freeblock(vdi);
  41.166 -}
  41.167 -
  41.168 -void vdi_snapshot(vdi_t *vdi)
  41.169 -{
  41.170 -    snap_rec_t rec;
  41.171 -    int ret;
  41.172 -    
  41.173 -    rec.radix_root = vdi->radix_root;
  41.174 -    gettimeofday(&rec.timestamp, NULL);
  41.175 -    rec.deleted = 0;
  41.176 -    
  41.177 -    vdi->radix_root = snapshot(vdi->radix_root);
  41.178 -    ret = snap_append(&vdi->snap, &rec, &vdi->snap);
  41.179 -    if ( ret != 0 ) {
  41.180 -        printf("snap_append returned failure\n");
  41.181 -        return;
  41.182 -    }
  41.183 -    writeblock(vdi->block, vdi);
  41.184 -}
  41.185 -    
  41.186 -int __init_vdi()
  41.187 -{
  41.188 -    /* sneak this in here for the moment. */
  41.189 -    __rcache_init();
  41.190 -    
  41.191 -    /* force the registry to be created if it doesn't exist. */
  41.192 -    vdi_registry_t *vdi_reg = get_vdi_registry();
  41.193 -    if (vdi_reg == NULL) {
  41.194 -        printf("[vdi.c] Couldn't get/create a VDI registry!\n");
  41.195 -        return -1;
  41.196 -    }
  41.197 -    freeblock(vdi_reg);
  41.198 -    
  41.199 -    
  41.200 -    return 0;
  41.201 -}
  41.202 -    
  41.203 -#ifdef VDI_STANDALONE
  41.204 -
  41.205 -#define TEST_VDIS      50
  41.206 -#define NR_ITERS    50000
  41.207 -#define FORK_POINTS   200
  41.208 -#define INIT_VDIS       3
  41.209 -#define INIT_SNAPS     40
  41.210 -
  41.211 -/* These must be of decreasing size: */
  41.212 -#define NEW_FORK       (RAND_MAX-(RAND_MAX/1000))
  41.213 -#define NEW_ROOT_VDI   (RAND_MAX-((RAND_MAX/1000)*2))
  41.214 -#define NEW_FORK_VDI   (RAND_MAX-((RAND_MAX/1000)*3))
  41.215 -
  41.216 -#define GRAPH_DOT_FILE "vdi.dot"
  41.217 -#define GRAPH_PS_FILE  "vdi.ps"
  41.218 -
  41.219 -
  41.220 -typedef struct sh_st {
  41.221 -    snap_id_t     id;
  41.222 -    struct sh_st *next;
  41.223 -} sh_t;
  41.224 -
  41.225 -#define SNAP_HASHSZ 1024
  41.226 -sh_t *node_hash[SNAP_HASHSZ];
  41.227 -#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
  41.228 -
  41.229 -#define SNAPID_EQUAL(_a,_b) \
  41.230 -    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
  41.231 -int sh_check_and_add(snap_id_t *id)
  41.232 -{
  41.233 -    sh_t **s = &node_hash[SNAP_HASH(id)];
  41.234 -    
  41.235 -    while (*s != NULL) {
  41.236 -        if (SNAPID_EQUAL(&((*s)->id), id))
  41.237 -            return 1;
  41.238 -        *s = (*s)->next;
  41.239 -    }
  41.240 -    
  41.241 -    *s = (sh_t *)malloc(sizeof(sh_t));
  41.242 -    (*s)->id = *id;
  41.243 -    (*s)->next = NULL;
  41.244 -    
  41.245 -    return 0;
  41.246 -}
  41.247 -
  41.248 -int main(int argc, char *argv[])
  41.249 -{
  41.250 -    vdi_t *vdi_list[TEST_VDIS];
  41.251 -    snap_id_t id, fork_points[FORK_POINTS];
  41.252 -    int nr_vdis = 0, nr_forks = 0;
  41.253 -    int i, j, r;
  41.254 -    FILE *f;
  41.255 -    char name[VDI_NAME_SZ];
  41.256 -    
  41.257 -    __init_blockstore();
  41.258 -    __init_vdi();
  41.259 -    
  41.260 -    printf("[o] Generating seed VDIs. (%d VDIs)\n", INIT_VDIS);
  41.261 -    
  41.262 -    for (i=0; i<INIT_VDIS; i++) {
  41.263 -        r=rand();
  41.264 -        
  41.265 -        sprintf(name, "VDI Number %d", nr_vdis);
  41.266 -        vdi_list[i] = vdi_create(NULL, name);
  41.267 -        for (j=0; j<(r%INIT_SNAPS); j++)
  41.268 -            vdi_snapshot(vdi_list[i]);
  41.269 -        fork_points[i] = vdi_list[i]->snap;
  41.270 -        nr_vdis++;
  41.271 -        nr_forks++;
  41.272 -    }
  41.273 -    
  41.274 -    printf("[o] Running a random workload. (%d iterations)\n", NR_ITERS);
  41.275 -            
  41.276 -    for (i=0; i<NR_ITERS; i++) {
  41.277 -        r = rand();
  41.278 -        
  41.279 -        if ( r > NEW_FORK ) {
  41.280 -            if ( nr_forks > FORK_POINTS )
  41.281 -                continue;
  41.282 -            id = vdi_list[r%nr_vdis]->snap;
  41.283 -            if ( ( id.block == 0 ) || ( id.index == 0 ) )
  41.284 -                continue;
  41.285 -            id.index--;
  41.286 -            fork_points[nr_forks++] = id;
  41.287 -            
  41.288 -        } else if ( r > NEW_ROOT_VDI ) {
  41.289 -            
  41.290 -            if ( nr_vdis == TEST_VDIS )
  41.291 -                continue;
  41.292 -            
  41.293 -            sprintf(name, "VDI Number %d.", nr_vdis);
  41.294 -            vdi_list[nr_vdis++] = vdi_create(NULL, name);
  41.295 -            
  41.296 -        } else if ( r > NEW_FORK_VDI ) {
  41.297 -            
  41.298 -            if ( nr_vdis == TEST_VDIS )
  41.299 -                continue;
  41.300 -            
  41.301 -            sprintf(name, "VDI Number %d.", nr_vdis);
  41.302 -            vdi_list[nr_vdis++] = vdi_create(&fork_points[r%nr_forks], name);
  41.303 -            
  41.304 -        } else /* SNAPSHOT */ {
  41.305 -            
  41.306 -            vdi_snapshot(vdi_list[r%nr_vdis]);
  41.307 -            
  41.308 -        }
  41.309 -    }
  41.310 -    
  41.311 -    /* now dump it out to a dot file. */
  41.312 -    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
  41.313 -    
  41.314 -    f = fopen(GRAPH_DOT_FILE, "w");
  41.315 -    
  41.316 -    /* write graph preamble */
  41.317 -    fprintf(f, "digraph G {\n");
  41.318 -    fprintf(f, "   rankdir=LR\n");
  41.319 -    
  41.320 -    for (i=0; i<nr_vdis; i++) {
  41.321 -        char oldnode[255];
  41.322 -        snap_block_t *blk;
  41.323 -        snap_id_t id = vdi_list[i]->snap;
  41.324 -        int nr_snaps, done=0;
  41.325 -        
  41.326 -        /* add a node for the id */
  41.327 -printf("vdi: %d\n", i);
  41.328 -        fprintf(f, "   n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
  41.329 -                id.block, id.index, vdi_list[i]->name,
  41.330 -                id.block, id.index);
  41.331 -        sprintf(oldnode, "n%Ld%d", id.block, id.index);
  41.332 -        
  41.333 -        while (id.block != 0) {
  41.334 -            blk = snap_get_block(id.block);
  41.335 -            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
  41.336 -            id = blk->hdr.fork_block;
  41.337 -            
  41.338 -            done = sh_check_and_add(&id);
  41.339 -            
  41.340 -            /* add a node for the fork_id */
  41.341 -            if (!done) {
  41.342 -                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
  41.343 -                    id.block, id.index,
  41.344 -                    id.block, id.index);
  41.345 -            }
  41.346 -            
  41.347 -            /* add an edge between them */
  41.348 -            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
  41.349 -                    id.block, id.index, oldnode, nr_snaps);
  41.350 -            sprintf(oldnode, "n%Ld%d", id.block, id.index);
  41.351 -            freeblock(blk);
  41.352 -            
  41.353 -            if (done) break;
  41.354 -        }
  41.355 -    }
  41.356 -    
  41.357 -    /* write graph postamble */
  41.358 -    fprintf(f, "}\n");
  41.359 -    fclose(f);
  41.360 -    
  41.361 -    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
  41.362 -    {
  41.363 -        char cmd[255];
  41.364 -        sprintf(cmd, "dot %s -Tps -o %s", GRAPH_DOT_FILE, GRAPH_PS_FILE);
  41.365 -        system(cmd);
  41.366 -    }
  41.367 -    return 0;
  41.368 -}
  41.369 -
  41.370 -#endif
    42.1 --- a/tools/blktap/parallax/vdi.h	Fri Jun 23 15:26:01 2006 -0600
    42.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.3 @@ -1,55 +0,0 @@
    42.4 -#ifndef _VDI_H_
    42.5 -#define _VDI_H_
    42.6 -/**************************************************************************
    42.7 - * 
    42.8 - * vdi.h
    42.9 - *
   42.10 - * Virtual Disk Image (VDI) Interfaces
   42.11 - *
   42.12 - */
   42.13 -
   42.14 -#ifndef __VDI_H__
   42.15 -#define __VDI_H__
   42.16 -
   42.17 -#include "blktaplib.h"
   42.18 -#include "snaplog.h"
   42.19 -
   42.20 -#define VDI_HEIGHT     27 /* Note that these are now hard-coded */
   42.21 -#define VDI_REG_HEIGHT 27 /* in the async lookup code           */
   42.22 -
   42.23 -#define VDI_NAME_SZ 256
   42.24 -
   42.25 -
   42.26 -typedef struct vdi {
   42.27 -    uint64_t         id;               /* unique vdi id -- used by the registry   */
   42.28 -    uint64_t         block;            /* block where this vdi lives (also unique)*/
   42.29 -    uint64_t         radix_root;       /* radix root node for block mappings      */
   42.30 -    snap_id_t   snap;             /* next snapshot slot for this VDI         */
   42.31 -    struct vdi *next;             /* used to hash-chain in blkif.            */
   42.32 -    blkif_vdev_t vdevice;         /* currently mounted as...                 */
   42.33 -    struct radix_lock *radix_lock;/* per-line L1 RW lock for parallel reqs   */
   42.34 -    char        name[VDI_NAME_SZ];/* human readable vdi name                 */
   42.35 -} vdi_t;
   42.36 -
   42.37 -#define VDI_REG_MAGIC   0xff00ff0bb0ff00ffLL
   42.38 -
   42.39 -typedef struct vdi_registry {
   42.40 -    uint64_t     magic;
   42.41 -    uint64_t     nr_vdis;
   42.42 -} vdi_registry_t;
   42.43 -
   42.44 -
   42.45 -int __init_vdi(void);
   42.46 -
   42.47 -vdi_t *vdi_get(uint64_t vdi_id);
   42.48 -void vdi_put(vdi_t *vdi);
   42.49 -vdi_registry_t *get_vdi_registry(void);
   42.50 -vdi_t *vdi_create(snap_id_t *parent_snap, char *name);
   42.51 -uint64_t vdi_lookup_block(vdi_t *vdi, uint64_t vdi_block, int *writable);
   42.52 -void vdi_update_block(vdi_t *vdi, uint64_t vdi_block, uint64_t g_block);
   42.53 -void vdi_snapshot(vdi_t *vdi);
   42.54 -
   42.55 -
   42.56 -#endif /* __VDI_H__ */
   42.57 -
   42.58 -#endif //_VDI_H_
    43.1 --- a/tools/blktap/parallax/vdi_create.c	Fri Jun 23 15:26:01 2006 -0600
    43.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.3 @@ -1,52 +0,0 @@
    43.4 -/**************************************************************************
    43.5 - * 
    43.6 - * vdi_create.c
    43.7 - *
    43.8 - * Create a new vdi.
    43.9 - *
   43.10 - */
   43.11 - 
   43.12 -#include <stdio.h>
   43.13 -#include <stdlib.h>
   43.14 -#include <string.h>
   43.15 -#include <sys/time.h>
   43.16 -#include "blockstore.h"
   43.17 -#include "radix.h"
   43.18 -#include "vdi.h"
   43.19 -
   43.20 -int main(int argc, char *argv[])
   43.21 -{
   43.22 -    vdi_t       *vdi;
   43.23 -    char         name[VDI_NAME_SZ] = "";
   43.24 -    snap_id_t    id;
   43.25 -    int          from_snap = 0;
   43.26 -    
   43.27 -    __init_blockstore();
   43.28 -    __init_vdi();
   43.29 -    
   43.30 -    if ( argc == 1 ) {
   43.31 -        printf("usage: %s <VDI Name> [<snap block> <snap idx>]\n", argv[0]);
   43.32 -        exit(-1);
   43.33 -    }
   43.34 -    
   43.35 -    strncpy( name, argv[1], VDI_NAME_SZ);
   43.36 -    name[VDI_NAME_SZ] = '\0';    
   43.37 -    
   43.38 -    if ( argc > 3 ) {
   43.39 -        id.block   = (uint64_t)          atoll(argv[2]);
   43.40 -        id.index   = (unsigned int) atol (argv[3]);
   43.41 -        from_snap  = 1;
   43.42 -    }
   43.43 -    
   43.44 -    vdi = vdi_create( from_snap ? &id : NULL, name);
   43.45 -    
   43.46 -    if ( vdi == NULL ) {
   43.47 -        printf("Failed to create VDI!\n");
   43.48 -        freeblock(vdi);
   43.49 -        exit(-1);
   43.50 -    }
   43.51 -    
   43.52 -    freeblock(vdi);
   43.53 -    
   43.54 -    return (0);
   43.55 -}
    44.1 --- a/tools/blktap/parallax/vdi_fill.c	Fri Jun 23 15:26:01 2006 -0600
    44.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.3 @@ -1,81 +0,0 @@
    44.4 -/**************************************************************************
    44.5 - * 
    44.6 - * vdi_fill.c
    44.7 - *
    44.8 - * Hoover a file or device into a vdi.
    44.9 - * You must first create the vdi with vdi_create.
   44.10 - *
   44.11 - */
   44.12 - 
   44.13 -#include <stdio.h>
   44.14 -#include <stdlib.h>
   44.15 -#include <string.h>
   44.16 -#include <sys/types.h>
   44.17 -#include <sys/stat.h>
   44.18 -#include <fcntl.h>
   44.19 -#include <unistd.h>
   44.20 -#include "blockstore.h"
   44.21 -#include "radix.h"
   44.22 -#include "requests-async.h"
   44.23 -#include "vdi.h"
   44.24 -
   44.25 -int main(int argc, char *argv[])
   44.26 -{
   44.27 -    vdi_t       *vdi;
   44.28 -    uint64_t          id;
   44.29 -    int          fd;
   44.30 -    struct stat  st;
   44.31 -    uint64_t          tot_size;
   44.32 -    char         spage[BLOCK_SIZE];
   44.33 -    char        *dpage;
   44.34 -    uint64_t          vblock = 0, count=0;
   44.35 -    
   44.36 -    __init_blockstore();
   44.37 -    init_block_async();
   44.38 -    __init_vdi();
   44.39 -    
   44.40 -    if ( argc < 3 ) {
   44.41 -        printf("usage: %s <VDI id> <filename>\n", argv[0]);
   44.42 -        exit(-1);
   44.43 -    }
   44.44 -        
   44.45 -    id = (uint64_t) atoll(argv[1]);
   44.46 -    
   44.47 -    vdi = vdi_get( id );
   44.48 -    
   44.49 -    if ( vdi == NULL ) {
   44.50 -        printf("Failed to retreive VDI %Ld!\n", id);
   44.51 -        exit(-1);
   44.52 -    }
   44.53 -    
   44.54 -    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
   44.55 -    
   44.56 -    if (fd < 0) {
   44.57 -        printf("Couldn't open %s!\n", argv[2]);
   44.58 -        exit(-1);
   44.59 -    }
   44.60 -    
   44.61 -    if ( fstat(fd, &st) != 0 ) {
   44.62 -        printf("Couldn't stat %s!\n", argv[2]);
   44.63 -        exit(-1);
   44.64 -    }
   44.65 -    
   44.66 -    tot_size = (uint64_t) st.st_size;
   44.67 -    printf("Filling VDI %Ld with %Ld bytes.\n", id, tot_size);
   44.68 -    
   44.69 -    printf("%011Ld blocks total\n", tot_size / BLOCK_SIZE);    
   44.70 -    printf("           ");
   44.71 -    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
   44.72 -        vdi_write_s(vdi, vblock, spage);
   44.73 -        
   44.74 -        vblock++;
   44.75 -        if ((vblock % 512) == 0)
   44.76 -        printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
   44.77 -        fflush(stdout);
   44.78 -    }
   44.79 -    printf("\n");
   44.80 -    
   44.81 -    freeblock(vdi);
   44.82 -    
   44.83 -    return (0);
   44.84 -}
    45.1 --- a/tools/blktap/parallax/vdi_list.c	Fri Jun 23 15:26:01 2006 -0600
    45.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.3 @@ -1,47 +0,0 @@
    45.4 -/**************************************************************************
    45.5 - * 
    45.6 - * vdi_list.c
    45.7 - *
    45.8 - * Print a list of VDIs on the block store.
    45.9 - *
   45.10 - */
   45.11 - 
   45.12 -#include <stdio.h>
   45.13 -#include <stdlib.h>
   45.14 -#include <string.h>
   45.15 -#include <sys/time.h>
   45.16 -#include "blockstore.h"
   45.17 -#include "radix.h"
   45.18 -#include "vdi.h"
   45.19 -
   45.20 -int main(int argc, char *argv[])
   45.21 -{
   45.22 -    vdi_registry_t *reg;
   45.23 -    vdi_t *vdi;
   45.24 -    int i;
   45.25 -    
   45.26 -    __init_blockstore();
   45.27 -    __init_vdi();
   45.28 -    
   45.29 -    reg = get_vdi_registry();
   45.30 -    
   45.31 -    if ( reg == NULL ) {
   45.32 -        printf("couldn't get VDI registry.\n");
   45.33 -        exit(-1);
   45.34 -    }
   45.35 -    
   45.36 -    for (i=0; i < reg->nr_vdis; i++) {
   45.37 -        vdi = vdi_get(i);
   45.38 -        
   45.39 -        if ( vdi != NULL ) {
   45.40 -            
   45.41 -            printf("%10Ld %60s\n", vdi->id, vdi->name);
   45.42 -            freeblock(vdi);
   45.43 -            
   45.44 -        }
   45.45 -    }
   45.46 -    
   45.47 -    freeblock(reg);
   45.48 -    
   45.49 -    return 0;
   45.50 -}
    46.1 --- a/tools/blktap/parallax/vdi_snap.c	Fri Jun 23 15:26:01 2006 -0600
    46.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.3 @@ -1,43 +0,0 @@
    46.4 -/**************************************************************************
    46.5 - * 
    46.6 - * vdi_snap.c
    46.7 - *
    46.8 - * Snapshot a vdi.
    46.9 - *
   46.10 - */
   46.11 - 
   46.12 -#include <stdio.h>
   46.13 -#include <stdlib.h>
   46.14 -#include <string.h>
   46.15 -#include <sys/time.h>
   46.16 -#include "blockstore.h"
   46.17 -#include "radix.h"
   46.18 -#include "vdi.h"
   46.19 -
   46.20 -int main(int argc, char *argv[])
   46.21 -{
   46.22 -    vdi_t  *vdi;
   46.23 -    uint64_t     id;
   46.24 -    
   46.25 -    __init_blockstore();
   46.26 -    __init_vdi();
   46.27 -    
   46.28 -    if ( argc == 1 ) {
   46.29 -        printf("usage: %s <VDI id>\n", argv[0]);
   46.30 -        exit(-1);
   46.31 -    }
   46.32 -    
   46.33 -    id = (uint64_t) atoll(argv[1]);
   46.34 -    
   46.35 -    vdi = vdi_get(id);
   46.36 -    
   46.37 -    if ( vdi == NULL ) {
   46.38 -        printf("couldn't find the requested VDI.\n");
   46.39 -        freeblock(vdi);
   46.40 -        exit(-1);
   46.41 -    }
   46.42 -    
   46.43 -    vdi_snapshot(vdi);
   46.44 -    
   46.45 -    return 0;
   46.46 -}
    47.1 --- a/tools/blktap/parallax/vdi_snap_delete.c	Fri Jun 23 15:26:01 2006 -0600
    47.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    47.3 @@ -1,48 +0,0 @@
    47.4 -/**************************************************************************
    47.5 - * 
    47.6 - * vdi_snap_delete.c
    47.7 - *
    47.8 - * Delete a snapshot.
    47.9 - *
   47.10 - * This is not finished:  right now it takes a snap n and calls 
   47.11 - * snap_collapse(n,n+1).
   47.12 - *
   47.13 - * TODO: support for non-consecutive, non-same-block snaps
   47.14 - *       Avoid forking probs.
   47.15 - *
   47.16 - */
   47.17 - 
   47.18 -#include <stdio.h>
   47.19 -#include <stdlib.h>
   47.20 -#include <string.h>
   47.21 -#include <sys/time.h>
   47.22 -#include "blockstore.h"
   47.23 -#include "snaplog.h"
   47.24 -#include "radix.h"
   47.25 -#include "vdi.h"
   47.26 -
   47.27 -int main(int argc, char *argv[])
   47.28 -{
   47.29 -    snap_id_t    id, c_id;
   47.30 -    int ret;
   47.31 -    
   47.32 -    __init_blockstore();
   47.33 -    __init_vdi();
   47.34 -    
   47.35 -    if ( argc != 3 ) {
   47.36 -        printf("usage: %s <snap block> <snap idx>\n", argv[0]);
   47.37 -        exit(-1);
   47.38 -    }
   47.39 -    
   47.40 -    id.block   = (uint64_t)          atoll(argv[1]);
   47.41 -    id.index   = (unsigned int) atol (argv[2]);
   47.42 -    
   47.43 -    c_id = id;
   47.44 -    c_id.index++;
   47.45 -    
   47.46 -    ret = snap_collapse(VDI_HEIGHT, &id, &c_id);
   47.47 -    
   47.48 -    printf("Freed %d blocks.\n", ret);
   47.49 -    
   47.50 -    return 0;
   47.51 -}
    48.1 --- a/tools/blktap/parallax/vdi_snap_list.c	Fri Jun 23 15:26:01 2006 -0600
    48.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.3 @@ -1,82 +0,0 @@
    48.4 -/**************************************************************************
    48.5 - * 
    48.6 - * vdi_snap_list.c
    48.7 - *
    48.8 - * Print a list of snapshots for the specified vdi.
    48.9 - *
   48.10 - */
   48.11 - 
   48.12 -#include <stdio.h>
   48.13 -#include <stdlib.h>
   48.14 -#include <string.h>
   48.15 -#include <time.h>
   48.16 -#include <sys/time.h>
   48.17 -#include "blockstore.h"
   48.18 -#include "radix.h"
   48.19 -#include "vdi.h"
   48.20 -
   48.21 -int main(int argc, char *argv[])
   48.22 -{
   48.23 -    vdi_t        *vdi;
   48.24 -    uint64_t           id;
   48.25 -    int           i, max_snaps = -1;
   48.26 -    snap_block_t *blk;
   48.27 -    snap_id_t     sid;
   48.28 -    char         *t;
   48.29 -    
   48.30 -    __init_blockstore();
   48.31 -    __init_vdi();
   48.32 -    
   48.33 -    if ( argc == 1 ) {
   48.34 -        printf("usage: %s <VDI id> [max snaps]\n", argv[0]);
   48.35 -        exit(-1);
   48.36 -    }
   48.37 -    
   48.38 -    id = (uint64_t) atoll(argv[1]);
   48.39 -    
   48.40 -    if ( argc > 2 ) {
   48.41 -        max_snaps = atoi(argv[2]);
   48.42 -    }
   48.43 -    
   48.44 -    vdi = vdi_get(id);
   48.45 -    
   48.46 -    if ( vdi == NULL ) {
   48.47 -        printf("couldn't find the requested VDI.\n");
   48.48 -        freeblock(vdi);
   48.49 -        exit(-1);
   48.50 -    }
   48.51 -    
   48.52 -    sid = vdi->snap;
   48.53 -    sid.index--;
   48.54 -    
   48.55 -    //printf("%8s%4s%21s %12s %1s\n", "Block", "idx", "timestamp", 
   48.56 -    //    "radix root", "d");
   48.57 -    printf("%8s%4s%37s %12s %1s\n", "Block", "idx", "timestamp", 
   48.58 -            "radix root", "d");
   48.59 -     
   48.60 -    while (sid.block != 0) {
   48.61 -        blk = snap_get_block(sid.block);
   48.62 -        for (i = sid.index; i >= 0; i--) {
   48.63 -            if ( max_snaps == 0  ) {
   48.64 -                freeblock(blk);
   48.65 -                goto done;
   48.66 -            }
   48.67 -            t = ctime(&blk->snaps[i].timestamp.tv_sec);
   48.68 -            t[strlen(t)-1] = '\0';
   48.69 -            //printf("%8Ld%4u%14lu.%06lu %12Ld %1s\n",
   48.70 -            printf("%8Ld%4u%30s %06lu %12Ld %1s\n",
   48.71 -                    sid.block, i, 
   48.72 -                    //blk->snaps[i].timestamp.tv_sec,
   48.73 -                    t,
   48.74 -                    blk->snaps[i].timestamp.tv_usec,
   48.75 -                    blk->snaps[i].radix_root,
   48.76 -                    blk->snaps[i].deleted ? "*" : " ");
   48.77 -            if ( max_snaps != -1 ) 
   48.78 -                max_snaps--;
   48.79 -        }
   48.80 -        sid = blk->hdr.parent_block;
   48.81 -        freeblock(blk);
   48.82 -    }
   48.83 -done:            
   48.84 -    return 0;
   48.85 -}
    49.1 --- a/tools/blktap/parallax/vdi_tree.c	Fri Jun 23 15:26:01 2006 -0600
    49.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.3 @@ -1,132 +0,0 @@
    49.4 -/**************************************************************************
    49.5 - * 
    49.6 - * vdi_tree.c
    49.7 - *
    49.8 - * Output current vdi tree to dot and postscript.
    49.9 - *
   49.10 - */
   49.11 - 
   49.12 -#include <stdio.h>
   49.13 -#include <stdlib.h>
   49.14 -#include <string.h>
   49.15 -#include <sys/time.h>
   49.16 -#include "blockstore.h"
   49.17 -#include "radix.h"
   49.18 -#include "vdi.h"
   49.19 -
   49.20 -#define GRAPH_DOT_FILE "vdi.dot"
   49.21 -#define GRAPH_PS_FILE  "vdi.ps"
   49.22 -
   49.23 -typedef struct sh_st {
   49.24 -    snap_id_t     id;
   49.25 -    struct sh_st *next;
   49.26 -} sh_t;
   49.27 -
   49.28 -#define SNAP_HASHSZ 1024
   49.29 -sh_t *node_hash[SNAP_HASHSZ];
   49.30 -#define SNAP_HASH(_id) (((int)(_id)->block^(_id)->index)%SNAP_HASHSZ)
   49.31 -
   49.32 -#define SNAPID_EQUAL(_a,_b) \
   49.33 -    (((_a)->block==(_b)->block) && ((_a)->index==(_b)->index))
   49.34 -int sh_check_and_add(snap_id_t *id)
   49.35 -{
   49.36 -    sh_t **s = &node_hash[SNAP_HASH(id)];
   49.37 -    
   49.38 -    while (*s != NULL) {
   49.39 -        if (SNAPID_EQUAL(&((*s)->id), id))
   49.40 -            return 1;
   49.41 -        *s = (*s)->next;
   49.42 -    }
   49.43 -    
   49.44 -    *s = (sh_t *)malloc(sizeof(sh_t));
   49.45 -    (*s)->id = *id;
   49.46 -    (*s)->next = NULL;
   49.47 -    
   49.48 -    return 0;
   49.49 -}
   49.50 -
   49.51 -int main(int argc, char *argv[])
   49.52 -{
   49.53 -    FILE *f;
   49.54 -    char dot_file[255] = GRAPH_DOT_FILE;
   49.55 -    char  ps_file[255] = GRAPH_PS_FILE;
   49.56 -    int nr_vdis = 0, nr_forks = 0;
   49.57 -    vdi_registry_t *reg;
   49.58 -    vdi_t *vdi;
   49.59 -    int i;
   49.60 -    
   49.61 -    __init_blockstore();
   49.62 -    __init_vdi();
   49.63 -    
   49.64 -    reg = get_vdi_registry();
   49.65 -    
   49.66 -    if ( reg == NULL ) {
   49.67 -        printf("couldn't get VDI registry.\n");
   49.68 -        exit(-1);
   49.69 -    }
   49.70 -    
   49.71 -    if ( argc > 1 ) {
   49.72 -        strncpy(ps_file, argv[1], 255);
   49.73 -        ps_file[255] = '\0';
   49.74 -    }
   49.75 -    
   49.76 -    /* now dump it out to a dot file. */
   49.77 -    printf("[o] Dumping state to a dot graph. (%d VDIs)\n", nr_vdis);
   49.78 -    
   49.79 -    f = fopen(dot_file, "w");
   49.80 -    
   49.81 -    /* write graph preamble */
   49.82 -    fprintf(f, "digraph G {\n");
   49.83 -    fprintf(f, "   rankdir=LR\n");
   49.84 -    
   49.85 -    for (i=0; i<reg->nr_vdis; i++) {
   49.86 -        char oldnode[255];
   49.87 -        snap_block_t *blk;
   49.88 -        snap_id_t id;
   49.89 -        int nr_snaps, done=0;
   49.90 -        
   49.91 -        vdi = vdi_get(i);
   49.92 -        id = vdi->snap;
   49.93 -        /* add a node for the id */
   49.94 -printf("vdi: %d\n", i);
   49.95 -        fprintf(f, "   n%Ld%d [color=blue,shape=box,label=\"%s\\nb:%Ld\\nidx:%d\"]\n", 
   49.96 -                id.block, id.index, vdi->name,
   49.97 -                id.block, id.index);
   49.98 -        sprintf(oldnode, "n%Ld%d", id.block, id.index);
   49.99 -        
  49.100 -        while (id.block != 0) {
  49.101 -            blk = snap_get_block(id.block);
  49.102 -            nr_snaps = blk->hdr.log_entries - (blk->hdr.nr_entries - id.index);
  49.103 -            id = blk->hdr.fork_block;
  49.104 -            
  49.105 -            done = sh_check_and_add(&id);
  49.106 -            
  49.107 -            /* add a node for the fork_id */
  49.108 -            if (!done) {
  49.109 -                fprintf(f, "   n%Ld%d [shape=box,label=\"b:%Ld\\nidx:%d\"]\n", 
  49.110 -                    id.block, id.index,
  49.111 -                    id.block, id.index);
  49.112 -            }
  49.113 -            
  49.114 -            /* add an edge between them */
  49.115 -            fprintf(f, "   n%Ld%d -> %s [label=\"%u snapshots\"]\n",
  49.116 -                    id.block, id.index, oldnode, nr_snaps);
  49.117 -            sprintf(oldnode, "n%Ld%d", id.block, id.index);
  49.118 -            freeblock(blk);
  49.119 -            
  49.120 -            if (done) break;
  49.121 -        }
  49.122 -    }
  49.123 -    
  49.124 -    /* write graph postamble */
  49.125 -    fprintf(f, "}\n");
  49.126 -    fclose(f);
  49.127 -    
  49.128 -    printf("[o] Generating postscript graph. (%s)\n", GRAPH_PS_FILE);
  49.129 -    {
  49.130 -        char cmd[255];
  49.131 -        sprintf(cmd, "dot %s -Tps -o %s", dot_file, ps_file);
  49.132 -        system(cmd);
  49.133 -    }
  49.134 -    return 0;
  49.135 -}
    50.1 --- a/tools/blktap/parallax/vdi_unittest.c	Fri Jun 23 15:26:01 2006 -0600
    50.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.3 @@ -1,184 +0,0 @@
    50.4 -/**************************************************************************
    50.5 - * 
    50.6 - * vdi_unittest.c
    50.7 - *
    50.8 - * Run a small test workload to ensure that data access through a vdi
    50.9 - * is (at least superficially) correct.
   50.10 - *
   50.11 - */
   50.12 - 
   50.13 -#include <stdio.h>
   50.14 -#include <stdlib.h>
   50.15 -#include <string.h>
   50.16 -#include <sys/types.h>
   50.17 -#include <sys/stat.h>
   50.18 -#include <fcntl.h>
   50.19 -#include <unistd.h>
   50.20 -#include "requests-async.h"
   50.21 -#include "blockstore.h"
   50.22 -#include "radix.h"
   50.23 -#include "vdi.h"
   50.24 -
   50.25 -#define TEST_PAGES  32
   50.26 -static char *zero_page;
   50.27 -static char pages[TEST_PAGES][BLOCK_SIZE];
   50.28 -static int next_page = 0;
   50.29 -
   50.30 -void fill_test_pages(void)
   50.31 -{
   50.32 -    int i, j;
   50.33 -    long *page;
   50.34 -
   50.35 -    for (i=0; i< TEST_PAGES; i++) {
   50.36 -        page = (unsigned long *)pages[i];
   50.37 -        for (j=0; j<(BLOCK_SIZE/4); j++) {
   50.38 -            page[j] = random();
   50.39 -        }
   50.40 -    }
   50.41 -
   50.42 -    zero_page = newblock();
   50.43 -}
   50.44 -
   50.45 -inline uint64_t make_vaddr(uint64_t L1, uint64_t L2, uint64_t L3)
   50.46 -{
   50.47 -    uint64_t ret = L1;
   50.48 -
   50.49 -    ret = (ret << 9) | L2;
   50.50 -    ret = (ret << 9) | L3;
   50.51 -
   50.52 -    return ret;
   50.53 -}
   50.54 -
   50.55 -void touch_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3)
   50.56 -{
   50.57 -    uint64_t vaddr;
   50.58 -    char *page = pages[next_page++];
   50.59 -    char *rpage = NULL;
   50.60 -
   50.61 -    printf("TOUCH (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
   50.62 -
   50.63 -    vaddr = make_vaddr(L1, L2, L3);
   50.64 -    vdi_write_s(vdi, vaddr, page);
   50.65 -    rpage = vdi_read_s(vdi, vaddr);
   50.66 -
   50.67 -    if (rpage == NULL) 
   50.68 -    {
   50.69 -        printf( "read %Lu returned NULL\n", vaddr); 
   50.70 -        return; 
   50.71 -    }
   50.72 -
   50.73 -    if (memcmp(page, rpage, BLOCK_SIZE) != 0)
   50.74 -    {
   50.75 -        printf( "read %Lu returned a different page\n", vaddr);
   50.76 -        return;
   50.77 -    }
   50.78 -
   50.79 -    freeblock(rpage);
   50.80 -}
   50.81 -
   50.82 -void test_block(vdi_t *vdi, uint64_t L1, uint64_t L2, uint64_t L3, char *page)
   50.83 -{
   50.84 -    uint64_t vaddr;
   50.85 -    char *rpage = NULL;
   50.86 -
   50.87 -    printf("TEST  (%3Lu, %3Lu, %3Lu)\n", L1, L2, L3);
   50.88 -
   50.89 -    vaddr = make_vaddr(L1, L2, L3);
   50.90 -    rpage = vdi_read_s(vdi, vaddr);
   50.91 -
   50.92 -    if (rpage == NULL) 
   50.93 -    {
   50.94 -        printf( "read %Lu returned NULL\n", vaddr); 
   50.95 -        return; 
   50.96 -    }
   50.97 -
   50.98 -    if (memcmp(page, rpage, BLOCK_SIZE) != 0)
   50.99 -    {
  50.100 -        printf( "read %Lu returned a different page\n", vaddr);
  50.101 -        return;
  50.102 -    }
  50.103 -
  50.104 -    freeblock(rpage);
  50.105 -}
  50.106 -
  50.107 -void coverage_test(vdi_t *vdi)
  50.108 -{
  50.109 -    uint64_t vaddr;
  50.110 -    int i, j, k;
  50.111 -
  50.112 -    /* Do a series of writes and reads to test all paths through the 
  50.113 -     * async radix code.  The radix request code will dump CRC warnings
  50.114 -     * if there are data problems here as well.
  50.115 -     */
  50.116 -
  50.117 -    /* L1 Zero */
  50.118 -    touch_block(vdi, 0, 0, 0);
  50.119 -
  50.120 -    /* L2 Zero */
  50.121 -    i = next_page;
  50.122 -    touch_block(vdi, 0, 1, 0);
  50.123 -
  50.124 -    /* L3 Zero */
  50.125 -    j = next_page;
  50.126 -    touch_block(vdi, 0, 0, 1);
  50.127 -    k = next_page;
  50.128 -    touch_block(vdi, 0, 1, 1);
  50.129 -
  50.130 -    /* Direct write */
  50.131 -    touch_block(vdi, 0, 0, 0);
  50.132 -
  50.133 -    vdi_snapshot(vdi);
  50.134 -
  50.135 -    /* L1 fault */
  50.136 -    touch_block(vdi, 0, 0, 0);
  50.137 -    /* test the read-only branches that should have been copied over. */
  50.138 -    test_block(vdi, 0, 1, 0, pages[i]);
  50.139 -    test_block(vdi, 0, 0, 1, pages[j]);
  50.140 -
  50.141 -    /* L2 fault */
  50.142 -    touch_block(vdi, 0, 1, 0);
  50.143 -    test_block(vdi, 0, 1, 1, pages[k]);
  50.144 -
  50.145 -    /* L3 fault */
  50.146 -    touch_block(vdi, 0, 0, 1);
  50.147 -    
  50.148 -    /* read - L1 zero */
  50.149 -    test_block(vdi, 1, 0, 0, zero_page);
  50.150 -    
  50.151 -    /* read - L2 zero */
  50.152 -    test_block(vdi, 0, 2, 0, zero_page);
  50.153 -
  50.154 -    /* read - L3 zero */
  50.155 -    test_block(vdi, 0, 0, 2, zero_page);
  50.156 -}
  50.157 -
  50.158 -int main(int argc, char *argv[])
  50.159 -{
  50.160 -    vdi_t       *vdi;
  50.161 -    uint64_t          id;
  50.162 -    int          fd;
  50.163 -    struct stat  st;
  50.164 -    uint64_t          tot_size;
  50.165 -    char         spage[BLOCK_SIZE];
  50.166 -    char        *dpage;
  50.167 -    uint64_t          vblock = 0, count=0;
  50.168 -    
  50.169 -    __init_blockstore();
  50.170 -    init_block_async();
  50.171 -    __init_vdi();
  50.172 -        
  50.173 -    vdi = vdi_create( NULL, "UNIT TEST VDI");
  50.174 -    
  50.175 -    if ( vdi == NULL ) {
  50.176 -        printf("Failed to create VDI!\n");
  50.177 -        freeblock(vdi);
  50.178 -        exit(-1);
  50.179 -    }
  50.180 -
  50.181 -    fill_test_pages();
  50.182 -    coverage_test(vdi);
  50.183 -    
  50.184 -    freeblock(vdi);
  50.185 -    
  50.186 -    return (0);
  50.187 -}
    51.1 --- a/tools/blktap/parallax/vdi_validate.c	Fri Jun 23 15:26:01 2006 -0600
    51.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    51.3 @@ -1,97 +0,0 @@
    51.4 -/**************************************************************************
    51.5 - * 
    51.6 - * vdi_validate.c
    51.7 - *
    51.8 - * Intended to sanity-check vm_fill and the underlying vdi code.
    51.9 - *
   51.10 - * Block-by-block compare of a vdi with a file/device on the disk.
   51.11 - *
   51.12 - */
   51.13 - 
   51.14 -#include <stdio.h>
   51.15 -#include <stdlib.h>
   51.16 -#include <string.h>
   51.17 -#include <sys/types.h>
   51.18 -#include <sys/stat.h>
   51.19 -#include <fcntl.h>
   51.20 -#include <unistd.h>
   51.21 -#include "blockstore.h"
   51.22 -#include "radix.h"
   51.23 -#include "vdi.h"
   51.24 -#include "requests-async.h"
   51.25 -
   51.26 -int main(int argc, char *argv[])
   51.27 -{
   51.28 -    vdi_t       *vdi;
   51.29 -    uint64_t          id;
   51.30 -    int          fd;
   51.31 -    struct stat  st;
   51.32 -    uint64_t          tot_size;
   51.33 -    char         spage[BLOCK_SIZE], *dpage;
   51.34 -    char        *vpage;
   51.35 -    uint64_t          vblock = 0, count=0;
   51.36 -    
   51.37 -    __init_blockstore();
   51.38 -    init_block_async();
   51.39 -    __init_vdi();
   51.40 -    
   51.41 -    if ( argc < 3 ) {
   51.42 -        printf("usage: %s <VDI id> <filename>\n", argv[0]);
   51.43 -        exit(-1);
   51.44 -    }
   51.45 -        
   51.46 -    id = (uint64_t) atoll(argv[1]);
   51.47 -    
   51.48 -    vdi = vdi_get( id );
   51.49 -    
   51.50 -    if ( vdi == NULL ) {
   51.51 -        printf("Failed to retreive VDI %Ld!\n", id);
   51.52 -        exit(-1);
   51.53 -    }
   51.54 -    
   51.55 -    fd = open(argv[2], O_RDONLY | O_LARGEFILE);
   51.56 -    
   51.57 -    if (fd < 0) {
   51.58 -        printf("Couldn't open %s!\n", argv[2]);
   51.59 -        exit(-1);
   51.60 -    }
   51.61 -    
   51.62 -    if ( fstat(fd, &st) != 0 ) {
   51.63 -        printf("Couldn't stat %s!\n", argv[2]);
   51.64 -        exit(-1);
   51.65 -    }
   51.66 -    
   51.67 -    tot_size = (uint64_t) st.st_size;
   51.68 -    printf("Testing VDI %Ld (%Ld bytes).\n", id, tot_size);
   51.69 -    
   51.70 -    printf("           ");
   51.71 -    while ( ( count = read(fd, spage, BLOCK_SIZE) ) > 0 ) {
   51.72 -
   51.73 -        dpage = vdi_read_s(vdi, vblock);
   51.74 -
   51.75 -        if (dpage == NULL) {
   51.76 -            printf("\n\nfound an unmapped VDI block (%Ld)\n", vblock);
   51.77 -            exit(0);
   51.78 -        }
   51.79 -
   51.80 -        if (memcmp(spage, dpage, BLOCK_SIZE) != 0) {
   51.81 -            printf("\n\nblocks don't match! (%Ld)\n", vblock);
   51.82 -            exit(0);
   51.83 -        }
   51.84 -        
   51.85 -        freeblock(dpage);
   51.86 -        
   51.87 -        vblock++;
   51.88 -        if ((vblock % 1024) == 0) {
   51.89 -            printf("\b\b\b\b\b\b\b\b\b\b\b%011Ld", vblock);
   51.90 -            fflush(stdout);
   51.91 -        }
   51.92 -    }
   51.93 -    printf("\n");
   51.94 -    
   51.95 -    printf("VDI %Ld looks good!\n", id);
   51.96 -    
   51.97 -    freeblock(vdi);
   51.98 -    
   51.99 -    return (0);
  51.100 -}
    52.1 --- a/tools/blktap/ublkback/Makefile	Fri Jun 23 15:26:01 2006 -0600
    52.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    52.3 @@ -1,41 +0,0 @@
    52.4 -
    52.5 -XEN_ROOT = ../../..
    52.6 -include $(XEN_ROOT)/tools/Rules.mk
    52.7 -
    52.8 -INCLUDES += -I..
    52.9 -
   52.10 -INSTALL            = install
   52.11 -INSTALL_PROG = $(INSTALL) -m0755
   52.12 -IBIN         = ublkback
   52.13 -INSTALL_DIR  = /usr/sbin
   52.14 -
   52.15 -CFLAGS   += -Werror
   52.16 -CFLAGS   += -Wno-unused
   52.17 -CFLAGS   += -fno-strict-aliasing
   52.18 -CFLAGS   += -I $(XEN_LIBXC)
   52.19 -CFLAGS   += $(INCLUDES) -I.
   52.20 -CFLAGS   += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
   52.21 -# Get gcc to generate the dependencies for us.
   52.22 -CFLAGS   += -Wp,-MD,.$(@F).d
   52.23 -DEPS     = .*.d
   52.24 -
   52.25 -OBJS     = $(patsubst %.c,%.o,$(SRCS))
   52.26 -
   52.27 -.PHONY: all
   52.28 -all: $(IBIN)
   52.29 -
   52.30 -LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse)
   52.31 -
   52.32 -.PHONY: install
   52.33 -install:
   52.34 -	$(INSTALL_PROG) $(IBIN) $(DESTDIR)$(INSTALL_DIR)
   52.35 -
   52.36 -.PHONY: clean
   52.37 -clean:
   52.38 -	rm -rf *.o*~ $(DEPS) xen TAGS $(IBIN)
   52.39 -
   52.40 -ublkback: 
   52.41 -	$(CC) $(CFLAGS) -o ublkback -L$(XEN_LIBXC) -L. -L..  \
   52.42 -	      -lblktap -laio ublkback.c ublkbacklib.c -pg
   52.43 -
   52.44 --include $(DEPS)
    53.1 --- a/tools/blktap/ublkback/ublkback.c	Fri Jun 23 15:26:01 2006 -0600
    53.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    53.3 @@ -1,18 +0,0 @@
    53.4 -/* ublkback.c
    53.5 - *
    53.6 - * libaio-based userlevel backend.
    53.7 - */
    53.8 -
    53.9 -#include "blktaplib.h"
   53.10 -#include "ublkbacklib.h"
   53.11 -
   53.12 -
   53.13 -int main(int argc, char *argv[])
   53.14 -{
   53.15 -    ublkback_init();
   53.16 -    
   53.17 -    register_new_blkif_hook(ublkback_new_blkif);
   53.18 -    blktap_listen();
   53.19 -    
   53.20 -    return 0;
   53.21 -}
    54.1 --- a/tools/blktap/ublkback/ublkbacklib.c	Fri Jun 23 15:26:01 2006 -0600
    54.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    54.3 @@ -1,473 +0,0 @@
    54.4 -/* ublkbacklib.c
    54.5 - *
    54.6 - * file/device image-backed block device -- using linux libaio.
    54.7 - * 
    54.8 - * (c) 2004 Andrew Warfield.
    54.9 - *
   54.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
   54.11 - * This will show up as device type (maj:240,min:0) = 61440.
   54.12 - *
   54.13 - * The fsid is placed in the sec_start field of the disk extent.
   54.14 - *
   54.15 - * NOTE: This doesn't work.  Grrr.
   54.16 - */
   54.17 -
   54.18 -#define _GNU_SOURCE
   54.19 -#define __USE_LARGEFILE64
   54.20 -
   54.21 -#include <stdio.h>
   54.22 -#include <stdlib.h>
   54.23 -#include <fcntl.h>
   54.24 -#include <string.h>
   54.25 -#include <db.h>       
   54.26 -#include <sys/stat.h>
   54.27 -#include <sys/types.h>
   54.28 -#include <sys/poll.h>
   54.29 -#include <unistd.h>
   54.30 -#include <errno.h>
   54.31 -#include <libaio.h>
   54.32 -#include <pthread.h>
   54.33 -#include <time.h>
   54.34 -#include <err.h>
   54.35 -#include "blktaplib.h"
   54.36 -
   54.37 -/* XXXX:  */
   54.38 -/* Current code just mounts this file/device to any requests that come in. */
   54.39 -//#define TMP_IMAGE_FILE_NAME "/dev/sda1"
   54.40 -#define TMP_IMAGE_FILE_NAME "fc3.image"
   54.41 -
   54.42 -#define MAX_REQUESTS            64 /* must be synced with the blkif drivers. */
   54.43 -#define MAX_SEGMENTS_PER_REQ    11
   54.44 -#define SECTOR_SHIFT             9
   54.45 -#define MAX_AIO_REQS   (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
   54.46 -
   54.47 -#if 0
   54.48 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   54.49 -#else
   54.50 -#define DPRINTF(_f, _a...) ((void)0)
   54.51 -#endif
   54.52 -           
   54.53 -#if 1                                                                        
   54.54 -#define ASSERT(_p) \
   54.55 -    if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \
   54.56 -    __LINE__, __FILE__); *(int*)0=0; }
   54.57 -#else
   54.58 -#define ASSERT(_p) ((void)0)
   54.59 -#endif                                                                     
   54.60 -
   54.61 -/* Note on pending_reqs: I assume all reqs are queued before they start to 
   54.62 - * get filled.  so count of 0 is an unused record.
   54.63 - */
   54.64 -typedef struct {
   54.65 -    blkif_request_t  req;
   54.66 -    blkif_t         *blkif;
   54.67 -    int              count;
   54.68 -} pending_req_t;
   54.69 -
   54.70 -static pending_req_t    pending_list[MAX_REQUESTS];
   54.71 -static io_context_t  ctx;
   54.72 -static struct iocb  *iocb_free[MAX_AIO_REQS];
   54.73 -static int           iocb_free_count;
   54.74 -
   54.75 -/* ---[ Notification mecahnism ]--------------------------------------- */
   54.76 -
   54.77 -enum { 
   54.78 -    READ   = 0,
   54.79 -    WRITE  = 1
   54.80 -};
   54.81 -
   54.82 -static int aio_notify[2];
   54.83 -static volatile int aio_listening = 0;
   54.84 -static pthread_mutex_t notifier_sem = PTHREAD_MUTEX_INITIALIZER;
   54.85 -
   54.86 -static struct io_event aio_events[MAX_AIO_REQS];
   54.87 -static int             aio_event_count = 0;
   54.88 -
   54.89 -/* this is commented out in libaio.h for some reason. */
   54.90 -extern int io_queue_wait(io_context_t ctx, struct timespec *timeout);
   54.91 -
   54.92 -static void *notifier_thread(void *arg)
   54.93 -{
   54.94 -    int ret; 
   54.95 -    int msg = 0x00feeb00;
   54.96 -    
   54.97 -    DPRINTF("Notifier thread started.\n");
   54.98 -    for (;;) {
   54.99 -        pthread_mutex_lock(&notifier_sem);
  54.100 -        if ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0) {
  54.101 -            aio_event_count = ret;
  54.102 -            write(aio_notify[WRITE], &msg, sizeof(msg));
  54.103 -        } else {
  54.104 -                printf("[io_queue_wait error! %d]\n", errno);
  54.105 -                pthread_mutex_unlock(&notifier_sem);
  54.106 -        }
  54.107 -    }
  54.108 -}
  54.109 -
  54.110 -/* --- Talking to xenstore: ------------------------------------------- */
  54.111 -
  54.112 -int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done);
  54.113 -int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done);
  54.114 -
  54.115 -typedef struct image {
  54.116 -    /* These need to turn into an array/rbtree for multi-disk support. */
  54.117 -    int  fd;
  54.118 -    uint64_t  fsid;
  54.119 -    blkif_vdev_t   vdevice;
  54.120 -    long int size;
  54.121 -    long int secsize;
  54.122 -    long int info;
  54.123 -} image_t;
  54.124 -
  54.125 -long int ublkback_get_size(blkif_t *blkif)
  54.126 -{
  54.127 -    image_t *img = (image_t *)blkif->prv;
  54.128 -    return img->size;
  54.129 -}
  54.130 -
  54.131 -long int ublkback_get_secsize(blkif_t *blkif)
  54.132 -{
  54.133 -    image_t *img = (image_t *)blkif->prv;
  54.134 -    return img->secsize;
  54.135 -}
  54.136 -
  54.137 -unsigned ublkback_get_info(blkif_t *blkif)
  54.138 -{
  54.139 -    image_t *img = (image_t *)blkif->prv;
  54.140 -    return img->info;
  54.141 -}
  54.142 -
  54.143 -static struct blkif_ops ublkback_ops = {
  54.144 -    get_size:    ublkback_get_size,
  54.145 -    get_secsize: ublkback_get_secsize,
  54.146 -    get_info:    ublkback_get_info,
  54.147 -};
  54.148 -
  54.149 -int ublkback_new_blkif(blkif_t *blkif)
  54.150 -{
  54.151 -    image_t *image;
  54.152 -    struct stat stat;
  54.153 -    int ret;
  54.154 -
  54.155 -    image = (image_t *)malloc(sizeof(image_t));
  54.156 -    if (image == NULL) {
  54.157 -        printf("error allocating image record.\n");
  54.158 -        return -ENOMEM;
  54.159 -    }
  54.160 -
  54.161 -    /* Open it. */
  54.162 -    image->fd = open(TMP_IMAGE_FILE_NAME, 
  54.163 -                     O_RDWR | O_DIRECT | O_LARGEFILE);
  54.164 -
  54.165 -    if ((image->fd < 0) && (errno == EINVAL)) {
  54.166 -        /* Maybe O_DIRECT isn't supported. */
  54.167 -        warn("open() failed on '%s', trying again without O_DIRECT",
  54.168 -               TMP_IMAGE_FILE_NAME);
  54.169 -        image->fd = open(TMP_IMAGE_FILE_NAME, O_RDWR | O_LARGEFILE);
  54.170 -    }
  54.171 -
  54.172 -    if (image->fd < 0) {
  54.173 -        warn("Couldn't open image file!");
  54.174 -        free(image);
  54.175 -        return -EINVAL;
  54.176 -    }
  54.177 -
  54.178 -    /* Size it. */
  54.179 -    ret = fstat(image->fd, &stat);
  54.180 -    if (ret != 0) {
  54.181 -        printf("Couldn't stat image in PROBE!");
  54.182 -        return -EINVAL;
  54.183 -    }
  54.184 -    
  54.185 -    image->size = (stat.st_size >> SECTOR_SHIFT);
  54.186 -
  54.187 -    /* TODO: IOCTL to get size of raw device. */
  54.188 -/*
  54.189 -  ret = ioctl(img->fd, BLKGETSIZE, &blksize);
  54.190 -  if (ret != 0) {
  54.191 -  printf("Couldn't ioctl image in PROBE!\n");
  54.192 -  goto err;
  54.193 -  }
  54.194 -*/
  54.195 -    if (image->size == 0)
  54.196 -        image->size =((uint64_t) 16836057);
  54.197 -    image->secsize = 512;
  54.198 -    image->info = 0;
  54.199 -
  54.200 -    /* Register the hooks */
  54.201 -    blkif_register_request_hook(blkif, "Ublkback req.", ublkback_request);
  54.202 -    blkif_register_response_hook(blkif, "Ublkback resp.", ublkback_response);
  54.203 -
  54.204 -
  54.205 -    printf(">X<Created a new blkif! pdev was %ld, but you got %s\n", 
  54.206 -           blkif->pdev, TMP_IMAGE_FILE_NAME);
  54.207 -
  54.208 -    blkif->ops = &ublkback_ops;
  54.209 -    blkif->prv = (void *)image;
  54.210 -
  54.211 -    return 0;
  54.212 -}
  54.213 -
  54.214 -
  54.215 -/* --- Moving the bits: ----------------------------------------------- */
  54.216 -
  54.217 -static int batch_count = 0;
  54.218 -int ublkback_request(blkif_t *blkif, blkif_request_t *req, int batch_done)
  54.219 -{
  54.220 -    int fd;
  54.221 -    uint64_t sector;
  54.222 -    char *spage, *dpage;
  54.223 -    int ret, i, idx;
  54.224 -    blkif_response_t *rsp;
  54.225 -    domid_t dom = ID_TO_DOM(req->id);
  54.226 -    static struct iocb *ioq[MAX_SEGMENTS_PER_REQ*MAX_REQUESTS]; 
  54.227 -    static int io_idx = 0;
  54.228 -    struct iocb *io;
  54.229 -    image_t *img;
  54.230 -
  54.231 -    img = (image_t *)blkif->prv;
  54.232 -    fd = img->fd;
  54.233 -
  54.234 -    switch (req->operation) 
  54.235 -    {
  54.236 -    case BLKIF_OP_WRITE:
  54.237 -    {
  54.238 -        unsigned long size;
  54.239 -
  54.240 -        batch_count++;
  54.241 -
  54.242 -        idx = ID_TO_IDX(req->id);
  54.243 -        ASSERT(pending_list[idx].count == 0);
  54.244 -        memcpy(&pending_list[idx].req, req, sizeof(*req));
  54.245 -        pending_list[idx].count = req->nr_segments;
  54.246 -        pending_list[idx].blkif = blkif;
  54.247 -        
  54.248 -        for (i = 0; i < req->nr_segments; i++) {
  54.249 -            
  54.250 -            sector = req->sector_number + (8*i);
  54.251 -            
  54.252 -            size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
  54.253 -            
  54.254 -            if (req->seg[i].first_sect != 0)
  54.255 -                DPRINTF("iWR: sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
  54.256 -                        "pos: %15lu\n",
  54.257 -                        req->sector_number, sector, 
  54.258 -                        req->seg[i].first_sect, req->seg[i].last_sect,
  54.259 -                        (long)(sector << SECTOR_SHIFT));
  54.260 -                        
  54.261 -            spage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  54.262 -            spage += req->seg[i].first_sect << SECTOR_SHIFT;
  54.263 -            
  54.264 -            /*convert size and sector to byte offsets */
  54.265 -            size   <<= SECTOR_SHIFT;
  54.266 -            sector <<= SECTOR_SHIFT;
  54.267 -            
  54.268 -            io = iocb_free[--iocb_free_count];
  54.269 -            io_prep_pwrite(io, fd, spage, size, sector);
  54.270 -            io->data = (void *)idx;
  54.271 -            //ioq[i] = io;
  54.272 -            ioq[io_idx++] = io;
  54.273 -        }
  54.274 -
  54.275 -        if (batch_done) {
  54.276 -            ret = io_submit(ctx, io_idx, ioq);
  54.277 -            batch_count = 0;
  54.278 -            if (ret < 0)
  54.279 -                printf("BADNESS: io_submit error! (%d)\n", errno);
  54.280 -            io_idx = 0;
  54.281 -        }
  54.282 -        
  54.283 -        return BLKTAP_STOLEN;
  54.284 -        
  54.285 -    }
  54.286 -    case BLKIF_OP_READ:
  54.287 -    {
  54.288 -        unsigned long size;
  54.289 -        
  54.290 -        batch_count++;
  54.291 -        idx = ID_TO_IDX(req->id);
  54.292 -        ASSERT(pending_list[idx].count == 0);
  54.293 -        memcpy(&pending_list[idx].req, req, sizeof(*req));
  54.294 -        pending_list[idx].count = req->nr_segments;
  54.295 -        pending_list[idx].blkif = blkif;
  54.296 -        
  54.297 -        for (i = 0; i < req->nr_segments; i++) {
  54.298 -            
  54.299 -            sector  = req->sector_number + (8*i);
  54.300 -            
  54.301 -            size = req->seg[i].last_sect - req->seg[i].first_sect + 1;
  54.302 -
  54.303 -            dpage  = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i);
  54.304 -            dpage += req->seg[i].first_sect << SECTOR_SHIFT;
  54.305 -            
  54.306 -            if (req->seg[i].first_sect != 0)
  54.307 -                DPRINTF("iRD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) "
  54.308 -                        "pos: %15lu dpage: %p\n", 
  54.309 -                        req->sector_number, sector, 
  54.310 -                        req->seg[i].first_sect, req->seg[i].last_sect,
  54.311 -                        (long)(sector << SECTOR_SHIFT), dpage);
  54.312 -            
  54.313 -            /*convert size and sector to byte offsets */
  54.314 -            size   <<= SECTOR_SHIFT;
  54.315 -            sector <<= SECTOR_SHIFT;
  54.316 -            
  54.317 -
  54.318 -            /*
  54.319 -             * NB: Looks like AIO now has non-page aligned support, this path 
  54.320 -             * can probably be removed...  Only really used for hunting
  54.321 -             * superblocks anyway... ;)
  54.322 -             */
  54.323 -            if ( ((unsigned long)dpage % PAGE_SIZE) != 0 ) {
  54.324 -                /* AIO to raw devices must be page aligned, so do this read
  54.325 -                 * synchronously.  The OS is probably just looking for 
  54.326 -                 * a superblock or something, so this won't hurt performance. 
  54.327 -                 */
  54.328 -                int ret;
  54.329 -
  54.330 -                printf("Slow path block read.\n");
  54.331 -                /* Question: do in-progress aio ops modify the file cursor? */
  54.332 -                ret = lseek(fd, sector, SEEK_SET);
  54.333 -                if (ret == (off_t)-1)
  54.334 -                    printf("lseek failed!\n");
  54.335 -                ret = read(fd, dpage, size);
  54.336 -                if (ret < 0)
  54.337 -                    printf("read problem (%d)\n", ret);
  54.338 -                printf("|\n|\n| read: %lld, %lu, %d\n|\n|\n", sector, size, ret);
  54.339 -
  54.340 -                /* not an async request any more... */
  54.341 -                pending_list[idx].count--;
  54.342 -
  54.343 -                rsp = (blkif_response_t *)req;
  54.344 -                rsp->id = req->id;
  54.345 -                rsp->operation = BLKIF_OP_READ;
  54.346 -                rsp->status = BLKIF_RSP_OKAY;
  54.347 -                return BLKTAP_RESPOND;  
  54.348 -                /* Doh -- need to flush aio if this is end-of-batch */
  54.349 -            }
  54.350 -
  54.351 -            io = iocb_free[--iocb_free_count];
  54.352 -            
  54.353 -            io_prep_pread(io, fd, dpage, size, sector);
  54.354 -            io->data = (void *)idx;
  54.355 -            
  54.356 -            ioq[io_idx++] = io;
  54.357 -            //ioq[i] = io;
  54.358 -        }
  54.359 -        
  54.360 -        if (batch_done) {
  54.361 -            ret = io_submit(ctx, io_idx, ioq);
  54.362 -            batch_count = 0;
  54.363 -            if (ret < 0)
  54.364 -                printf("BADNESS: io_submit error! (%d)\n", errno);
  54.365 -            io_idx = 0;
  54.366 -        }
  54.367 -        
  54.368 -        return BLKTAP_STOLEN;
  54.369 -        
  54.370 -    }
  54.371 -    }
  54.372 -    
  54.373 -    printf("Unknown block operation!\n");
  54.374 -err:
  54.375 -    rsp = (blkif_response_t *)req;
  54.376 -    rsp->id = req->id;
  54.377 -    rsp->operation = req->operation;
  54.378 -    rsp->status = BLKIF_RSP_ERROR;
  54.379 -    return BLKTAP_RESPOND;  
  54.380 -}
  54.381 -
  54.382 -
  54.383 -int ublkback_pollhook(int fd)
  54.384 -{
  54.385 -    struct io_event *ep;
  54.386 -    int n, ret, idx;
  54.387 -    blkif_request_t *req;
  54.388 -    blkif_response_t *rsp;
  54.389 -    int responses_queued = 0;
  54.390 -    int pages=0;
  54.391 -    
  54.392 -    for (ep = aio_events; aio_event_count-- > 0; ep++) {
  54.393 -        struct iocb *io = ep->obj;
  54.394 -        idx = (int) ep->data;
  54.395 -        
  54.396 -        if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){
  54.397 -            printf("invalid index returned(%u)!\n", idx);
  54.398 -            break;
  54.399 -        }
  54.400 -        
  54.401 -        if ((int)ep->res < 0) 
  54.402 -            printf("***\n***aio request error! (%d,%d)\n***\n", 
  54.403 -                   (int)ep->res, (int)ep->res2);
  54.404 -        
  54.405 -        pending_list[idx].count--;
  54.406 -        iocb_free[iocb_free_count++] = io;
  54.407 -        pages++;
  54.408 -
  54.409 -        if (pending_list[idx].count == 0) {
  54.410 -            blkif_request_t tmp = pending_list[idx].req;
  54.411 -            rsp = (blkif_response_t *)&pending_list[idx].req;
  54.412 -            rsp->id = tmp.id;
  54.413 -            rsp->operation = tmp.operation;
  54.414 -            rsp->status = BLKIF_RSP_OKAY;
  54.415 -            blkif_inject_response(pending_list[idx].blkif, rsp);
  54.416 -            responses_queued++;
  54.417 -        }
  54.418 -    }
  54.419 -
  54.420 -    if (responses_queued) {
  54.421 -        blktap_kick_responses();
  54.422 -    }
  54.423 -    
  54.424 -    read(aio_notify[READ], &idx, sizeof(idx));
  54.425 -    aio_listening = 1;
  54.426 -    pthread_mutex_unlock(&notifier_sem);
  54.427 -    
  54.428 -    return 0;
  54.429 -}
  54.430 -
  54.431 -/* the image library terminates the request stream. _resp is a noop. */
  54.432 -int ublkback_response(blkif_t *blkif, blkif_response_t *rsp, int batch_done)
  54.433 -{   
  54.434 -    return BLKTAP_PASS;
  54.435 -}
  54.436 -
  54.437 -void ublkback_init(void)
  54.438 -{
  54.439 -    int i, rc;
  54.440 -    pthread_t p;
  54.441 -    
  54.442 -    for (i = 0; i < MAX_REQUESTS; i++)
  54.443 -        pending_list[i].count = 0; 
  54.444 -    
  54.445 -    memset(&ctx, 0, sizeof(ctx));
  54.446 -    rc = io_queue_init(MAX_AIO_REQS, &ctx);
  54.447 -    if (rc != 0) {
  54.448 -        printf("queue_init failed! (%d)\n", rc);
  54.449 -        exit(0);
  54.450 -    }
  54.451 -    
  54.452 -    for (i=0; i<MAX_AIO_REQS; i++) {
  54.453 -        if (!(iocb_free[i] = (struct iocb *)malloc(sizeof(struct iocb)))) {
  54.454 -            printf("error allocating iocb array\n");
  54.455 -            exit(0);
  54.456 -        }
  54.457 -        iocb_free_count = i;
  54.458 -    }
  54.459 -    
  54.460 -    rc = pipe(aio_notify);
  54.461 -    if (rc != 0) {
  54.462 -        printf("pipe failed! (%d)\n", errno);
  54.463 -        exit(0);
  54.464 -    }
  54.465 -    
  54.466 -    rc = pthread_create(&p, NULL, notifier_thread, NULL);
  54.467 -    if (rc != 0) {
  54.468 -        printf("pthread_create failed! (%d)\n", errno);
  54.469 -        exit(0);
  54.470 -    }
  54.471 -    
  54.472 -    aio_listening = 1;
  54.473 -    
  54.474 -    blktap_attach_poll(aio_notify[READ], POLLIN, ublkback_pollhook);
  54.475 -}
  54.476 -
    55.1 --- a/tools/blktap/ublkback/ublkbacklib.h	Fri Jun 23 15:26:01 2006 -0600
    55.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    55.3 @@ -1,16 +0,0 @@
    55.4 -/* blkaiolib.h
    55.5 - *
    55.6 - * aio image-backed block device.
    55.7 - * 
    55.8 - * (c) 2004 Andrew Warfield.
    55.9 - *
   55.10 - * Xend has been modified to use an amorfs:[fsid] disk tag.
   55.11 - * This will show up as device type (maj:240,min:0) = 61440.
   55.12 - *
   55.13 - * The fsid is placed in the sec_start field of the disk extent.
   55.14 - */
   55.15 -
   55.16 -int  ublkback_request(blkif_request_t *req, int batch_done);
   55.17 -int  ublkback_response(blkif_response_t *rsp); /* noop */
   55.18 -int  ublkback_new_blkif(blkif_t *blkif);
   55.19 -void ublkback_init(void);
    56.1 --- a/tools/blktap/xenbus.c	Fri Jun 23 15:26:01 2006 -0600
    56.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    56.3 @@ -1,568 +0,0 @@
    56.4 -/*
    56.5 - * xenbus.c
    56.6 - * 
    56.7 - * xenbus interface to the blocktap.
    56.8 - * 
    56.9 - * this handles the top-half of integration with block devices through the
   56.10 - * store -- the tap driver negotiates the device channel etc, while the
   56.11 - * userland tap clinet needs to sort out the disk parameters etc.
   56.12 - * 
   56.13 - * A. Warfield 2005 Based primarily on the blkback and xenbus driver code.  
   56.14 - * Comments there apply here...
   56.15 - */
   56.16 -
   56.17 -#include <stdio.h>
   56.18 -#include <stdlib.h>
   56.19 -#include <string.h>
   56.20 -#include <err.h>
   56.21 -#include <stdarg.h>
   56.22 -#include <errno.h>
   56.23 -#include <xs.h>
   56.24 -#include <sys/types.h>
   56.25 -#include <sys/stat.h>
   56.26 -#include <fcntl.h>
   56.27 -#include <poll.h>
   56.28 -#include "blktaplib.h"
   56.29 -#include "list.h"
   56.30 -
   56.31 -#if 0
   56.32 -#define DPRINTF(_f, _a...) printf ( _f , ## _a )
   56.33 -#else
   56.34 -#define DPRINTF(_f, _a...) ((void)0)
   56.35 -#endif
   56.36 -
   56.37 -/* --- Xenstore / Xenbus helpers ---------------------------------------- */
   56.38 -/*
   56.39 - * These should all be pulled out into the xenstore API.  I'm faulting commands
   56.40 - * in from the xenbus interface as i need them.
   56.41 - */
   56.42 -
   56.43 -
   56.44 -/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
   56.45 -int xs_gather(struct xs_handle *xs, const char *dir, ...)
   56.46 -{
   56.47 -    va_list ap;
   56.48 -    const char *name;
   56.49 -    char *path;
   56.50 -    int ret = 0;
   56.51 -    
   56.52 -    va_start(ap, dir);
   56.53 -    while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
   56.54 -        const char *fmt = va_arg(ap, char *);
   56.55 -        void *result = va_arg(ap, void *);
   56.56 -        char *p;
   56.57 -        
   56.58 -        if (asprintf(&path, "%s/%s", dir, name) == -1)
   56.59 -        {
   56.60 -            warn("allocation error in xs_gather!\n");
   56.61 -            ret = ENOMEM;
   56.62 -            break;
   56.63 -        }
   56.64 -        p = xs_read(xs, path, NULL);
   56.65 -        free(path);
   56.66 -        if (p == NULL) {
   56.67 -            ret = ENOENT;
   56.68 -            break;
   56.69 -        }
   56.70 -        if (fmt) {
   56.71 -            if (sscanf(p, fmt, result) == 0)
   56.72 -                ret = EINVAL;
   56.73 -            free(p);
   56.74 -        } else
   56.75 -            *(char **)result = p;
   56.76 -    }
   56.77 -    va_end(ap);
   56.78 -    return ret;
   56.79 -}
   56.80 -
   56.81 -/* Single printf and write: returns -errno or 0. */
   56.82 -int xs_printf(struct xs_handle *h, const char *dir, const char *node, 
   56.83 -                  const char *fmt, ...)
   56.84 -{
   56.85 -        char *buf, *path;
   56.86 -        va_list ap;
   56.87 -        int ret;
   56.88 - 
   56.89 -        va_start(ap, fmt);
   56.90 -        ret = vasprintf(&buf, fmt, ap);
   56.91 -        va_end(ap);
   56.92 - 
   56.93 -        asprintf(&path, "%s/%s", dir, node);
   56.94 -
   56.95 -        if ((path == NULL) || (buf == NULL))
   56.96 -            return 0;
   56.97 -
   56.98 -        ret = xs_write(h, path, buf, strlen(buf)+1);
   56.99 -
  56.100 -        free(buf);
  56.101 -        free(path);
  56.102 -
  56.103 -        return ret;
  56.104 -}
  56.105 -
  56.106 -
  56.107 -int xs_exists(struct xs_handle *h, const char *path)
  56.108 -{
  56.109 -    char **d;
  56.110 -    int num;
  56.111 -
  56.112 -    d = xs_directory(h, path, &num);
  56.113 -    if (d == NULL)
  56.114 -        return 0;
  56.115 -    free(d);
  56.116 -    return 1;
  56.117 -}
  56.118 -
  56.119 -
  56.120 -
  56.121 -/* This assumes that the domain name we are looking for is unique! */
  56.122 -char *get_dom_domid(struct xs_handle *h, const char *name)
  56.123 -{
  56.124 -    char **e, *val, *domid = NULL;
  56.125 -    int num, i, len;
  56.126 -    char *path;
  56.127 -
  56.128 -    e = xs_directory(h, "/local/domain", &num);
  56.129 -
  56.130 -    i=0;
  56.131 -    while (i < num) {
  56.132 -        asprintf(&path, "/local/domain/%s/name", e[i]);
  56.133 -        val = xs_read(h, path, &len);
  56.134 -        free(path);
  56.135 -        if (val == NULL)
  56.136 -            continue;
  56.137 -        if (strcmp(val, name) == 0) {
  56.138 -            /* match! */
  56.139 -            asprintf(&path, "/local/domain/%s/domid", e[i]);
  56.140 -            domid = xs_read(h, path, &len);
  56.141 -            free(val);
  56.142 -            free(path);
  56.143 -            break;
  56.144 -        }
  56.145 -        free(val);
  56.146 -        i++;
  56.147 -    }
  56.148 -
  56.149 -    free(e);
  56.150 -    return domid;
  56.151 -}
  56.152 -
  56.153 -static int strsep_len(const char *str, char c, unsigned int len)
  56.154 -{
  56.155 -    unsigned int i;
  56.156 -    
  56.157 -    for (i = 0; str[i]; i++)
  56.158 -        if (str[i] == c) {
  56.159 -            if (len == 0)
  56.160 -                return i;
  56.161 -            len--;
  56.162 -        }
  56.163 -    return (len == 0) ? i : -ERANGE;
  56.164 -}
  56.165 -
  56.166 -
  56.167 -/* xenbus watches: */     
  56.168 -/* Register callback to watch this node. */
  56.169 -struct xenbus_watch
  56.170 -{
  56.171 -        struct list_head list;
  56.172 -        char *node;
  56.173 -        void (*callback)(struct xs_handle *h, 
  56.174 -                         struct xenbus_watch *, 
  56.175 -                         const  char *node);
  56.176 -};
  56.177 -
  56.178 -static LIST_HEAD(watches);
  56.179 -
  56.180 -/* A little paranoia: we don't just trust token. */
  56.181 -static struct xenbus_watch *find_watch(const char *token)
  56.182 -{
  56.183 -    struct xenbus_watch *i, *cmp;
  56.184 -    
  56.185 -    cmp = (void *)strtoul(token, NULL, 16);
  56.186 -    
  56.187 -    list_for_each_entry(i, &watches, list)
  56.188 -        if (i == cmp)
  56.189 -            return i;
  56.190 -    return NULL;
  56.191 -}
  56.192 -
  56.193 -/* Register callback to watch this node. like xs_watch, return 0 on failure */
  56.194 -int register_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
  56.195 -{
  56.196 -    /* Pointer in ascii is the token. */
  56.197 -    char token[sizeof(watch) * 2 + 1];
  56.198 -    int er;
  56.199 -    
  56.200 -    sprintf(token, "%lX", (long)watch);
  56.201 -    if (find_watch(token)) 
  56.202 -    {
  56.203 -        warn("watch collision!");
  56.204 -        return -EINVAL;
  56.205 -    }
  56.206 -    
  56.207 -    er = xs_watch(h, watch->node, token);
  56.208 -    if (er != 0) {
  56.209 -        list_add(&watch->list, &watches);
  56.210 -    } 
  56.211 -        
  56.212 -    return er;
  56.213 -}
  56.214 -
  56.215 -int unregister_xenbus_watch(struct xs_handle *h, struct xenbus_watch *watch)
  56.216 -{
  56.217 -    char token[sizeof(watch) * 2 + 1];
  56.218 -    int er;
  56.219 -    
  56.220 -    sprintf(token, "%lX", (long)watch);
  56.221 -    if (!find_watch(token))
  56.222 -    {
  56.223 -        warn("no such watch!");
  56.224 -        return -EINVAL;
  56.225 -    }
  56.226 -    
  56.227 -    
  56.228 -    er = xs_unwatch(h, watch->node, token);
  56.229 -    list_del(&watch->list);
  56.230 -    
  56.231 -    if (er == 0)
  56.232 -        warn("XENBUS Failed to release watch %s: %i",
  56.233 -             watch->node, er);
  56.234 -    return 0;
  56.235 -}
  56.236 -
  56.237 -/* Re-register callbacks to all watches. */
  56.238 -void reregister_xenbus_watches(struct xs_handle *h)
  56.239 -{
  56.240 -    struct xenbus_watch *watch;
  56.241 -    char token[sizeof(watch) * 2 + 1];
  56.242 -    
  56.243 -    list_for_each_entry(watch, &watches, list) {
  56.244 -        sprintf(token, "%lX", (long)watch);
  56.245 -        xs_watch(h, watch->node, token);
  56.246 -    }
  56.247 -}
  56.248 -
  56.249 -/* based on watch_thread() */
  56.250 -int xs_fire_next_watch(struct xs_handle *h)
  56.251 -{
  56.252 -    char **res;
  56.253 -    char *token;
  56.254 -    char *node = NULL;
  56.255 -    struct xenbus_watch *w;
  56.256 -    int er;
  56.257 -    unsigned int num;
  56.258 -
  56.259 -    res = xs_read_watch(h, &num);
  56.260 -    if (res == NULL) 
  56.261 -        return -EAGAIN; /* in O_NONBLOCK, read_watch returns 0... */
  56.262 -
  56.263 -    node  = res[XS_WATCH_PATH];
  56.264 -    token = res[XS_WATCH_TOKEN];
  56.265 -
  56.266 -    w = find_watch(token);
  56.267 -    if (!w)
  56.268 -    {
  56.269 -        warn("unregistered watch fired");
  56.270 -        goto done;
  56.271 -    }
  56.272 -    w->callback(h, w, node);
  56.273 -
  56.274 - done:
  56.275 -    free(res);
  56.276 -    return 1;
  56.277 -}
  56.278 -
  56.279 -
  56.280 -
  56.281 -
  56.282 -/* ---------------------------------------------------------------------- */
  56.283 -
  56.284 -struct backend_info
  56.285 -{
  56.286 -    /* our communications channel */
  56.287 -    blkif_t *blkif;
  56.288 -    
  56.289 -    long int frontend_id;
  56.290 -    long int pdev;
  56.291 -    long int readonly;
  56.292 -    
  56.293 -    /* watch back end for changes */
  56.294 -    struct xenbus_watch backend_watch;
  56.295 -    char *backpath;
  56.296 -
  56.297 -    /* watch front end for changes */
  56.298 -    struct xenbus_watch watch;
  56.299 -    char *frontpath;
  56.300 -
  56.301 -    struct list_head list;
  56.302 -};
  56.303 -
  56.304 -static LIST_HEAD(belist);
  56.305 -
  56.306 -static struct backend_info *be_lookup_be(const char *bepath)
  56.307 -{
  56.308 -    struct backend_info *be;
  56.309 -
  56.310 -    list_for_each_entry(be, &belist, list)
  56.311 -        if (strcmp(bepath, be->backpath) == 0)
  56.312 -            return be;
  56.313 -    return (struct backend_info *)NULL;
  56.314 -}
  56.315 -
  56.316 -static int be_exists_be(const char *bepath)
  56.317 -{
  56.318 -    return ( be_lookup_be(bepath) != NULL );
  56.319 -}
  56.320 -
  56.321 -static struct backend_info *be_lookup_fe(const char *fepath)
  56.322 -{
  56.323 -    struct backend_info *be;
  56.324 -
  56.325 -    list_for_each_entry(be, &belist, list)
  56.326 -        if (strcmp(fepath, be->frontpath) == 0)
  56.327 -            return be;
  56.328 -    return (struct backend_info *)NULL;
  56.329 -}
  56.330 -
  56.331 -static int backend_remove(struct xs_handle *h, struct backend_info *be)
  56.332 -{
  56.333 -    /* Turn off watches. */
  56.334 -    if (be->watch.node)
  56.335 -        unregister_xenbus_watch(h, &be->watch);
  56.336 -    if (be->backend_watch.node)
  56.337 -        unregister_xenbus_watch(h, &be->backend_watch);
  56.338 -
  56.339 -    /* Unhook from be list. */
  56.340 -    list_del(&be->list);
  56.341 -
  56.342 -    /* Free everything else. */
  56.343 -    if (be->blkif)
  56.344 -        free_blkif(be->blkif);
  56.345 -    free(be->frontpath);
  56.346 -    free(be->backpath);
  56.347 -    free(be);
  56.348 -    return 0;
  56.349 -}
  56.350 -
  56.351 -static void frontend_changed(struct xs_handle *h, struct xenbus_watch *w, 
  56.352 -                     const char *fepath_im)
  56.353 -{
  56.354 -    struct backend_info *be;
  56.355 -    char *fepath = NULL;
  56.356 -    int er;
  56.357 -
  56.358 -    be = be_lookup_fe(w->node);
  56.359 -    if (be == NULL)
  56.360 -    {
  56.361 -        warn("frontend changed called for nonexistent backend! (%s)", fepath);
  56.362 -        goto fail;
  56.363 -    }
  56.364 -    
  56.365 -    /* If other end is gone, delete ourself. */
  56.366 -    if (w->node && !xs_exists(h, be->frontpath)) {
  56.367 -        DPRINTF("DELETING BE: %s\n", be->backpath);
  56.368 -        backend_remove(h, be);
  56.369 -        return;
  56.370 -    }
  56.371 -
  56.372 -    if (be->blkif == NULL || (be->blkif->state == CONNECTED))
  56.373 -        return;
  56.374 -
  56.375 -    /* Supply the information about the device the frontend needs */
  56.376 -    er = xs_transaction_start(h, be->backpath);
  56.377 -    if (er == 0) {
  56.378 -        warn("starting transaction");
  56.379 -        goto fail;
  56.380 -    }
  56.381 -    
  56.382 -    er = xs_printf(h, be->backpath, "sectors", "%lu",
  56.383 -			    be->blkif->ops->get_size(be->blkif));
  56.384 -    if (er == 0) {
  56.385 -        warn("writing sectors");
  56.386 -        goto fail;
  56.387 -    }
  56.388 -    
  56.389 -    er = xs_printf(h, be->backpath, "info", "%u",
  56.390 -			    be->blkif->ops->get_info(be->blkif));
  56.391 -    if (er == 0) {
  56.392 -        warn("writing info");
  56.393 -        goto fail;
  56.394 -    }
  56.395 -    
  56.396 -    er = xs_printf(h, be->backpath, "sector-size", "%lu",
  56.397 -			    be->blkif->ops->get_secsize(be->blkif));
  56.398 -    if (er == 0) {
  56.399 -        warn("writing sector-size");
  56.400 -        goto fail;
  56.401 -    }
  56.402 -
  56.403 -    be->blkif->state = CONNECTED;
  56.404 -
  56.405 -    xs_transaction_end(h, 0);
  56.406 -
  56.407 -    return;
  56.408 -
  56.409 - fail:
  56.410 -    free(fepath);
  56.411 -}
  56.412 -
  56.413 -
  56.414 -static void backend_changed(struct xs_handle *h, struct xenbus_watch *w, 
  56.415 -                     const char *bepath_im)
  56.416 -{
  56.417 -    struct backend_info *be;
  56.418 -    char *path = NULL, *p;
  56.419 -    int len, er;
  56.420 -    long int pdev = 0, handle;
  56.421 -
  56.422 -    be = be_lookup_be(w->node);
  56.423 -    if (be == NULL)
  56.424 -    {
  56.425 -        warn("backend changed called for nonexistent backend! (%s)", w->node);
  56.426 -        goto fail;
  56.427 -    }
  56.428 -    
  56.429 -    er = xs_gather(h, be->backpath, "physical-device", "%li", &pdev, NULL);
  56.430 -    if (er != 0) 
  56.431 -        goto fail;
  56.432 -
  56.433 -    if (be->pdev && be->pdev != pdev) {
  56.434 -        warn("changing physical-device not supported");
  56.435 -        goto fail;
  56.436 -    }
  56.437 -    be->pdev = pdev;
  56.438 -
  56.439 -    asprintf(&path, "%s/%s", w->node, "read-only");
  56.440 -    if (xs_exists(h, path))
  56.441 -        be->readonly = 1;
  56.442 -
  56.443 -    if (be->blkif == NULL) {
  56.444 -        /* Front end dir is a number, which is used as the handle. */
  56.445 -        p = strrchr(be->frontpath, '/') + 1;
  56.446 -        handle = strtoul(p, NULL, 0);
  56.447 -
  56.448 -        be->blkif = alloc_blkif(be->frontend_id);
  56.449 -        if (be->blkif == NULL) 
  56.450 -            goto fail;
  56.451 -
  56.452 -        er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
  56.453 -        if (er) 
  56.454 -            goto fail;
  56.455 -
  56.456 -        DPRINTF("[BECHG]: ADDED A NEW BLKIF (%s)\n", w->node);
  56.457 -
  56.458 -        /* Pass in NULL node to skip exist test. */
  56.459 -        frontend_changed(h, &be->watch, NULL);
  56.460 -    }
  56.461 -
  56.462 - fail:
  56.463 -    free(path);
  56.464 -}
  56.465 -
  56.466 -static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w, 
  56.467 -                         const char *bepath_im)
  56.468 -{
  56.469 -	struct backend_info *be = NULL;
  56.470 -	char *frontend = NULL, *bepath = NULL;
  56.471 -	int er, len;
  56.472 -
  56.473 -        bepath = strdup(bepath_im);
  56.474 -        if (!bepath)
  56.475 -            return;
  56.476 -        len = strsep_len(bepath, '/', 6);
  56.477 -        if (len < 0) 
  56.478 -            goto free_be;
  56.479 -        
  56.480 -        bepath[len] = '\0'; /*truncate the passed-in string with predjudice. */
  56.481 -
  56.482 -	be = malloc(sizeof(*be));
  56.483 -	if (!be) {
  56.484 -		warn("allocating backend structure");
  56.485 -		goto free_be;
  56.486 -	}
  56.487 -	memset(be, 0, sizeof(*be));
  56.488 -
  56.489 -	frontend = NULL;
  56.490 -	er = xs_gather(h, bepath,
  56.491 -                        "frontend-id", "%li", &be->frontend_id,
  56.492 -                        "frontend", NULL, &frontend,
  56.493 -                        NULL);
  56.494 -	if (er)
  56.495 -		goto free_be;
  56.496 -
  56.497 -	if (strlen(frontend) == 0 || !xs_exists(h, frontend)) {
  56.498 -            /* If we can't get a frontend path and a frontend-id,
  56.499 -             * then our bus-id is no longer valid and we need to
  56.500 -             * destroy the backend device.
  56.501 -             */
  56.502 -            DPRINTF("No frontend (%s)\n", frontend);
  56.503 -            goto free_be;
  56.504 -	}
  56.505 -
  56.506 -        /* Are we already tracking this device? */
  56.507 -        if (be_exists_be(bepath))
  56.508 -            goto free_be;
  56.509 -
  56.510 -        be->backpath = bepath;
  56.511 -	be->backend_watch.node = be->backpath;
  56.512 -	be->backend_watch.callback = backend_changed;
  56.513 -	er = register_xenbus_watch(h, &be->backend_watch);
  56.514 -	if (er == 0) {
  56.515 -		be->backend_watch.node = NULL;
  56.516 -		warn("error adding backend watch on %s", bepath);
  56.517 -		goto free_be;
  56.518 -	}
  56.519 -
  56.520 -	be->frontpath = frontend;
  56.521 -	be->watch.node = be->frontpath;
  56.522 -	be->watch.callback = frontend_changed;
  56.523 -	er = register_xenbus_watch(h, &be->watch);
  56.524 -	if (er == 0) {
  56.525 -		be->watch.node = NULL;
  56.526 -		warn("adding frontend watch on %s", be->frontpath);
  56.527 -		goto free_be;
  56.528 -	}
  56.529 -
  56.530 -        list_add(&be->list, &belist);
  56.531 -
  56.532 -        DPRINTF("[PROBE]: ADDED NEW DEVICE (%s)\n", bepath_im);
  56.533 -
  56.534 -	backend_changed(h, &be->backend_watch, bepath);
  56.535 -	return;
  56.536 -
  56.537 - free_be:
  56.538 -	if (be && (be->backend_watch.node))
  56.539 -            unregister_xenbus_watch(h, &be->backend_watch);
  56.540 -        free(frontend);
  56.541 -        free(bepath);
  56.542 -	free(be);
  56.543 -	return;
  56.544 -}
  56.545 -
  56.546 -
  56.547 -int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
  56.548 -{
  56.549 -    char *domid, *path;
  56.550 -    struct xenbus_watch *vbd_watch;
  56.551 -    int er;
  56.552 -
  56.553 -    domid = get_dom_domid(h, domname);
  56.554 -
  56.555 -    DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]");
  56.556 -
  56.557 -    asprintf(&path, "/local/domain/%s/backend/vbd", domid);
  56.558 -    if (path == NULL) 
  56.559 -        return -ENOMEM;
  56.560 -
  56.561 -    vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch));
  56.562 -    vbd_watch->node     = path;
  56.563 -    vbd_watch->callback = blkback_probe;
  56.564 -    er = register_xenbus_watch(h, vbd_watch);
  56.565 -    if (er == 0) {
  56.566 -        warn("Error adding vbd probe watch %s", path);
  56.567 -        return -EINVAL;
  56.568 -    }
  56.569 -
  56.570 -    return 0;
  56.571 -}
    57.1 --- a/tools/examples/network-bridge	Fri Jun 23 15:26:01 2006 -0600
    57.2 +++ b/tools/examples/network-bridge	Fri Jun 23 15:33:25 2006 -0600
    57.3 @@ -151,30 +151,6 @@ link_exists()
    57.4      fi
    57.5  }
    57.6  
    57.7 -
    57.8 -# Usage: create_bridge bridge
    57.9 -create_bridge () {
   57.10 -    local bridge=$1
   57.11 -
   57.12 -    # Don't create the bridge if it already exists.
   57.13 -    if ! brctl show | grep -q ${bridge} ; then
   57.14 -	brctl addbr ${bridge}
   57.15 -	brctl stp ${bridge} off
   57.16 -	brctl setfd ${bridge} 0
   57.17 -    fi
   57.18 -    ip link set ${bridge} up
   57.19 -}
   57.20 -
   57.21 -# Usage: add_to_bridge bridge dev
   57.22 -add_to_bridge () {
   57.23 -    local bridge=$1
   57.24 -    local dev=$2
   57.25 -    # Don't add $dev to $bridge if it's already on a bridge.
   57.26 -    if ! brctl show | grep -q ${dev} ; then
   57.27 -	brctl addif ${bridge} ${dev}
   57.28 -    fi
   57.29 -}
   57.30 -
   57.31  # Set the default forwarding policy for $dev to drop.
   57.32  # Allow forwarding to the bridge.
   57.33  antispoofing () {
   57.34 @@ -238,14 +214,13 @@ using loopback.nloopbacks=<N> on the dom
   57.35  	fi
   57.36  	ip link set ${netdev} name ${pdev}
   57.37  	ip link set ${vdev} name ${netdev}
   57.38 -	ip link set ${pdev} down arp off
   57.39 -	ip link set ${pdev} addr fe:ff:ff:ff:ff:ff
   57.40 -	ip addr flush ${pdev}
   57.41 +
   57.42 +	setup_bridge_port ${pdev}
   57.43 +	setup_bridge_port ${vif0}
   57.44  	ip link set ${netdev} addr ${mac} arp on
   57.45 -	add_to_bridge ${bridge} ${vif0}
   57.46 +
   57.47  	ip link set ${bridge} up
   57.48 -	ip link set ${vif0} up
   57.49 -	ip link set ${pdev} up
   57.50 +	add_to_bridge  ${bridge} ${vif0}
   57.51  	add_to_bridge2 ${bridge} ${pdev}
   57.52  	do_ifup ${netdev}
   57.53      else
   57.54 @@ -301,6 +276,7 @@ add_to_bridge2() {
   57.55      local maxtries=10
   57.56  
   57.57      echo -n "Waiting for ${dev} to negotiate link."
   57.58 +    ip link set ${dev} up
   57.59      for i in `seq ${maxtries}` ; do
   57.60  	if ifconfig ${dev} | grep -q RUNNING ; then
   57.61  	    break
    58.1 --- a/tools/examples/vif-bridge	Fri Jun 23 15:26:01 2006 -0600
    58.2 +++ b/tools/examples/vif-bridge	Fri Jun 23 15:33:25 2006 -0600
    58.3 @@ -48,16 +48,8 @@ fi
    58.4  
    58.5  case "$command" in
    58.6      online)
    58.7 -        if brctl show | grep -q "$vif"
    58.8 -        then
    58.9 -          log debug "$vif already attached to a bridge"
   58.10 -          exit 0
   58.11 -        fi
   58.12 -
   58.13 -        brctl addif "$bridge" "$vif" ||
   58.14 -          fatal "brctl addif $bridge $vif failed"
   58.15 -
   58.16 -        ifconfig "$vif" up || fatal "ifconfig $vif up failed"
   58.17 +	setup_bridge_port "$vif"
   58.18 +	add_to_bridge "$bridge" "$vif"
   58.19          ;;
   58.20  
   58.21      offline)
    59.1 --- a/tools/examples/xen-network-common.sh	Fri Jun 23 15:26:01 2006 -0600
    59.2 +++ b/tools/examples/xen-network-common.sh	Fri Jun 23 15:33:25 2006 -0600
    59.3 @@ -104,3 +104,48 @@ find_dhcpd_init_file()
    59.4  {
    59.5    first_file -x /etc/init.d/{dhcp3-server,dhcp,dhcpd}
    59.6  }
    59.7 +
    59.8 +# configure interfaces which act as pure bridge ports:
    59.9 +#  - make quiet: no arp, no multicast (ipv6 autoconf)
   59.10 +#  - set mac address to fe:ff:ff:ff:ff:ff
   59.11 +setup_bridge_port() {
   59.12 +    local dev="$1"
   59.13 +
   59.14 +    # take interface down ...
   59.15 +    ip link set ${dev} down
   59.16 +
   59.17 +    # ... and configure it
   59.18 +    ip link set ${dev} arp off
   59.19 +    ip link set ${dev} multicast off
   59.20 +    ip link set ${dev} addr fe:ff:ff:ff:ff:ff
   59.21 +    ip addr flush ${dev}
   59.22 +}
   59.23 +
   59.24 +# Usage: create_bridge bridge
   59.25 +create_bridge () {
   59.26 +    local bridge=$1
   59.27 +
   59.28 +    # Don't create the bridge if it already exists.
   59.29 +    if [ ! -e "/sys/class/net/${bridge}/bridge" ]; then
   59.30 +	brctl addbr ${bridge}
   59.31 +	brctl stp ${bridge} off
   59.32 +	brctl setfd ${bridge} 0
   59.33 +        ip link set ${bridge} arp off
   59.34 +        ip link set ${bridge} multicast off
   59.35 +    fi
   59.36 +    ip link set ${bridge} up
   59.37 +}
   59.38 +
   59.39 +# Usage: add_to_bridge bridge dev
   59.40 +add_to_bridge () {
   59.41 +    local bridge=$1
   59.42 +    local dev=$2
   59.43 +
   59.44 +    # Don't add $dev to $bridge if it's already on a bridge.
   59.45 +    if [ -e "/sys/class/net/${bridge}/brif/${dev}" ]; then
   59.46 +	return
   59.47 +    fi
   59.48 +    brctl addif ${bridge} ${dev}
   59.49 +    ip link set ${dev} up
   59.50 +}
   59.51 +
    60.1 --- a/tools/examples/xmexample.hvm	Fri Jun 23 15:26:01 2006 -0600
    60.2 +++ b/tools/examples/xmexample.hvm	Fri Jun 23 15:33:25 2006 -0600
    60.3 @@ -164,3 +164,15 @@ ne2000=0
    60.4  #-----------------------------------------------------------------------------
    60.5  #    start in full screen
    60.6  #full-screen=1   
    60.7 +
    60.8 +
    60.9 +#-----------------------------------------------------------------------------
   60.10 +#   Enable USB support (specific devices specified at runtime through the
   60.11 +#			monitor window)
   60.12 +#usb=1
   60.13 +
   60.14 +#   Enable USB mouse support (only enable one of the following, `mouse' for
   60.15 +#			      PS/2 protocol relative mouse, `tablet' for
   60.16 +#			      absolute mouse)
   60.17 +#usbdevice='mouse'
   60.18 +#usbdevice='tablet'
    61.1 --- a/tools/firmware/acpi/Makefile	Fri Jun 23 15:26:01 2006 -0600
    61.2 +++ b/tools/firmware/acpi/Makefile	Fri Jun 23 15:33:25 2006 -0600
    61.3 @@ -33,17 +33,16 @@ IASL_VER=acpica-unix-20050513
    61.4  IASL_URL=http://developer.intel.com/technology/iapc/acpi/downloads/$(IASL_VER).tar.gz
    61.5  
    61.6  vpath iasl $(PATH)
    61.7 -.PHONY: all
    61.8  all:$(ACPI_BIN)
    61.9  
   61.10  acpi_dsdt.c:acpi_dsdt.asl
   61.11  	$(MAKE) iasl
   61.12 -	iasl -oa -tc acpi_dsdt.asl
   61.13 +	iasl  -tc acpi_dsdt.asl
   61.14  	mv acpi_dsdt.hex acpi_dsdt.c
   61.15  	echo "int DsdtLen=sizeof(AmlCode);" >> acpi_dsdt.c
   61.16  	rm *.aml
   61.17 +#        iasl -oa -tc acpi_dsdt.asl
   61.18  
   61.19 -.PHONY: iasl
   61.20  iasl:
   61.21  	@echo
   61.22  	@echo "ACPI ASL compiler(iasl) is needed"
   61.23 @@ -62,10 +61,8 @@ iasl:
   61.24  $(ACPI_BIN):$(ACPI_GEN)
   61.25  	./$(ACPI_GEN) $(ACPI_BIN)
   61.26  
   61.27 -.PHONY: clean
   61.28  clean:
   61.29  	rm -rf *.o $(ACPI_GEN) $(ACPI_BIN) $(IASL_VER) 
   61.30  	rm -rf  $(IASL_VER).tar.gz
   61.31  
   61.32 -.PHONY: install
   61.33  install: all
    62.1 --- a/tools/firmware/acpi/acpi2_0.h	Fri Jun 23 15:26:01 2006 -0600
    62.2 +++ b/tools/firmware/acpi/acpi2_0.h	Fri Jun 23 15:33:25 2006 -0600
    62.3 @@ -323,7 +323,7 @@ typedef struct {
    62.4  // The physical that acpi table reside in the guest BIOS
    62.5  //#define ACPI_PHYSICAL_ADDRESS 0xE2000
    62.6  #define ACPI_PHYSICAL_ADDRESS 0xEA000
    62.7 -#define ACPI_TABLE_SIZE (2*1024)  //Currently 2K is enough
    62.8 +#define ACPI_TABLE_SIZE (4*1024)  //Currently 4K is enough
    62.9  
   62.10  void
   62.11  AcpiBuildTable(uint8_t* buf);
    63.1 --- a/tools/firmware/acpi/acpi_dsdt.asl	Fri Jun 23 15:26:01 2006 -0600
    63.2 +++ b/tools/firmware/acpi/acpi_dsdt.asl	Fri Jun 23 15:33:25 2006 -0600
    63.3 @@ -20,7 +20,7 @@
    63.4  //**
    63.5  //**
    63.6  
    63.7 -DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL ", "XEN     ", 2)
    63.8 +DefinitionBlock ("DSDT.aml", "DSDT", 1, "INTEL","int-xen", 2006)
    63.9  {
   63.10      Name (\PMBS, 0x0C00)
   63.11      Name (\PMLN, 0x08)
   63.12 @@ -29,24 +29,33 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
   63.13      Name (\APCB, 0xFEC00000)
   63.14      Name (\APCL, 0x00010000)
   63.15      Name (\PUID, 0x00)
   63.16 +
   63.17      Scope (\_PR)
   63.18      {
   63.19          Processor (CPU0, 0x00, 0x00000000, 0x00) {}
   63.20          Processor (CPU1, 0x01, 0x00000000, 0x00) {}
   63.21          Processor (CPU2, 0x02, 0x00000000, 0x00) {}
   63.22          Processor (CPU3, 0x03, 0x00000000, 0x00) {}
   63.23 +
   63.24      }
   63.25  
   63.26  /* Poweroff support - ties in with qemu emulation */
   63.27  
   63.28      Name (\_S5, Package (0x04)
   63.29      {
   63.30 -        0x07, 
   63.31 -        0x07, 
   63.32 -        0x00, 
   63.33 +        0x07,
   63.34 +        0x07,
   63.35 +        0x00,
   63.36          0x00
   63.37      })
   63.38  
   63.39 +
   63.40 +      	Name(PICD, 0)	
   63.41 +
   63.42 +	Method(_PIC, 1) { 
   63.43 + 
   63.44 +		Store(Arg0, PICD) 
   63.45 +	}
   63.46      Scope (\_SB)
   63.47      {
   63.48          Device (PCI0)
   63.49 @@ -55,9 +64,20 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
   63.50              Name (_UID, 0x00)
   63.51              Name (_ADR, 0x00)
   63.52              Name (_BBN, 0x00)
   63.53 +            OperationRegion (PIRP, PCI_Config, 0x3c, 0x10)
   63.54 +           Field(PIRP, ByteAcc, NoLock, Preserve){        
   63.55 +          IRQ3,3,
   63.56 +          IRQ5,5,
   63.57 +          IRQ7,7,
   63.58 +          IRQ9,9,
   63.59 +          IRQA,10,
   63.60 +          IRQB,11
   63.61 +         }
   63.62 + 
   63.63              Method (_CRS, 0, NotSerialized)
   63.64              {
   63.65 -                Name (PRT0, ResourceTemplate ()
   63.66 +          
   63.67 +               Name (PRT0, ResourceTemplate ()
   63.68                  {
   63.69  					/* bus number is from 0 - 255*/
   63.70                      WordBusNumber (ResourceConsumer, MinFixed, MaxFixed, SubDecode,
   63.71 @@ -79,75 +99,270 @@ DefinitionBlock ("DSDT.aml", "DSDT", 1, 
   63.72                          0x0FFF,
   63.73                          0x0000,
   63.74                          0x0300)
   63.75 +
   63.76 +                 /* reserve what device model consumed for IDE and acpi pci device            */
   63.77 +                     WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode, EntireRange,
   63.78 +                        0x0000,
   63.79 +                        0xc000,
   63.80 +                        0xc01f,
   63.81 +                        0x0000,
   63.82 +                        0x0020)
   63.83 +                 /* reserve what device model consumed for Ethernet controller pci device        */
   63.84 +                     WordIO (ResourceConsumer, MinFixed, MaxFixed, PosDecode, EntireRange,
   63.85 +                        0x0000,
   63.86 +                        0xc020,
   63.87 +                        0xc03f,
   63.88 +                        0x0000,
   63.89 +                        0x0010)
   63.90 +
   63.91                      DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadOnly,
   63.92                          0x00000000,
   63.93 -                        0x000A0000,
   63.94 +                        0x000c0000,
   63.95                          0x000FFFFF,
   63.96                          0x00000000,
   63.97 -                        0x00060000)
   63.98 +                        0x00030000)
   63.99 +
  63.100 +                 /* reserve what device model consumed for PCI VGA device        */
  63.101 +
  63.102 +                    DWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite,
  63.103 +                        0x00000000,
  63.104 +                        0xF0000000,
  63.105 +                        0xF1FFFFFF,
  63.106 +                        0x00000000,
  63.107 +                        0x02000000)
  63.108 +                    DWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite,
  63.109 +                        0x00000000,
  63.110 +                        0xF2000000,
  63.111 +                        0xF2000FFF,
  63.112 +                        0x00000000,
  63.113 +                        0x00001000)
  63.114 +                 /* reserve what device model consumed for Ethernet controller pci device        */
  63.115 +                      DWordMemory (ResourceConsumer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite,
  63.116 +                        0x00000000,
  63.117 +                        0xF2001000,
  63.118 +                        0xF200101F,
  63.119 +                        0x00000000,
  63.120 +                        0x00000020) 
  63.121                  })
  63.122                  Return (PRT0)
  63.123              }
  63.124 -
  63.125 -            Name (AIR0, Package (0x06)
  63.126 -            {
  63.127 -               Package (0x04)
  63.128 -                {
  63.129 -                    0x001FFFFF, 
  63.130 -                    0x02, 
  63.131 -                    0x00, 
  63.132 -                    0x17
  63.133 -                }, 
  63.134 -
  63.135 -                Package (0x04)
  63.136 -                {
  63.137 -                    0x001FFFFF, 
  63.138 -                    0x03, 
  63.139 -                    0x00, 
  63.140 -                    0x13
  63.141 -                }, 
  63.142 -
  63.143 -                Package (0x04)
  63.144 -                {
  63.145 -                    0x001DFFFF, 
  63.146 -                    0x01, 
  63.147 -                    0x00, 
  63.148 -                    0x13
  63.149 -                }, 
  63.150 +	Name(BUFA, ResourceTemplate() {
  63.151 +                IRQ(Level, ActiveLow, Shared) {
  63.152 +                        3,4,5,6,7,10,11,12,14,15}		 
  63.153 +                }) 
  63.154  
  63.155 -                Package (0x04)
  63.156 -                {
  63.157 -                    0x001DFFFF, 
  63.158 -                    0x00, 
  63.159 -                    0x00, 
  63.160 -                    0x10
  63.161 -                }, 
  63.162 +                Name(BUFB, Buffer(){
  63.163 +                0x23, 0x00, 0x00, 0x18,
  63.164 +                0x79, 0})
  63.165  
  63.166 -                Package (0x04)
  63.167 -                {
  63.168 -                    0x001DFFFF, 
  63.169 -                    0x02, 
  63.170 -                    0x00, 
  63.171 -                    0x12
  63.172 -                }, 
  63.173 +                CreateWordField(BUFB, 0x01, IRQV)
  63.174 +		
  63.175 +                Name(BUFC, Buffer(){
  63.176 +                5, 7, 10, 11
  63.177 +                 })
  63.178 +                
  63.179 +                CreateByteField(BUFC, 0x01, PIQA)
  63.180 +                CreateByteField(BUFC, 0x01, PIQB)
  63.181 +                CreateByteField(BUFC, 0x01, PIQC)
  63.182 +                CreateByteField(BUFC, 0x01, PIQD)
  63.183 +                		
  63.184 +		Device(LNKA)	{
  63.185 +                Name(_HID, EISAID("PNP0C0F")) 	// PCI interrupt link
  63.186 +                Name(_UID, 1)
  63.187 +                Method(_STA, 0) {
  63.188 +                               And(PIRA, 0x80, Local0)
  63.189 +                        If(LEqual(Local0, 0x80)) {
  63.190 +                                Return(0x09)	
  63.191 +                                }
  63.192 +                        Else {
  63.193 +                                Return(0x0B)  	
  63.194 +                                }
  63.195 +                        }
  63.196  
  63.197 -                Package (0x04)
  63.198 -                {
  63.199 -                    0x001DFFFF, 
  63.200 -                    0x03, 
  63.201 -                    0x00, 
  63.202 -                    0x17
  63.203 +                Method(_PRS) {
  63.204 +
  63.205 +                        Return(BUFA)
  63.206 +                } // Method(_PRS)
  63.207 +
  63.208 +                Method(_DIS) {
  63.209 +                               Or(PIRA, 0x80, PIRA)
  63.210                  }
  63.211 -            })
  63.212 -            Method (_PRT, 0, NotSerialized)
  63.213 -            {
  63.214 -                Return (AIR0)
  63.215 -            }
  63.216  
  63.217 +                Method(_CRS) {
  63.218 +                        And(PIRB, 0x0f, Local0)		 
  63.219 +                        ShiftLeft(0x1, Local0, IRQV)	 
  63.220 +                        Return(BUFB)			 
  63.221 +                } 
  63.222 +
  63.223 +                Method(_SRS, 1) {
  63.224 +                                CreateWordField(ARG0, 0x01, IRQ1)	 
  63.225 +                        FindSetRightBit(IRQ1, Local0)		 
  63.226 +                        Decrement(Local0)			 
  63.227 +                        Store(Local0, PIRA)			 
  63.228 +                 } // Method(_SRS)
  63.229 +        }
  63.230 +
  63.231 +        Device(LNKB)	{
  63.232 +                Name(_HID, EISAID("PNP0C0F")) 	 
  63.233 +                Name(_UID, 2)
  63.234 +                Method(_STA, 0) {
  63.235 +                               And(PIRB, 0x80, Local0)
  63.236 +                        If(LEqual(Local0, 0x80)) {
  63.237 +                                Return(0x09)	 
  63.238 +                                }
  63.239 +                        Else {
  63.240 +                                Return(0x0B)  	 
  63.241 +                                }
  63.242 +                        }
  63.243 +
  63.244 +                Method(_PRS) {
  63.245 +                                Return(BUFA)			 
  63.246 +                } // Method(_PRS)
  63.247 +
  63.248 +                Method(_DIS) {
  63.249 +
  63.250 +                               Or(PIRB, 0x80, PIRB)
  63.251 +                }
  63.252 +
  63.253 +                Method(_CRS) {
  63.254 +                        And(PIRB, 0x0f, Local0)		 
  63.255 +                        ShiftLeft(0x1, Local0, IRQV)	 
  63.256 +                        Return(BUFB)			 
  63.257 +                } // Method(_CRS)
  63.258 +
  63.259 +                Method(_SRS, 1) {
  63.260 +                                CreateWordField(ARG0, 0x01, IRQ1)	 
  63.261 +                        FindSetRightBit(IRQ1, Local0)		 
  63.262 +                        Decrement(Local0)			 
  63.263 +                        Store(Local0, PIRB)			 
  63.264 +                 } // Method(_SRS)
  63.265 +        }
  63.266 +
  63.267 +        Device(LNKC)	{
  63.268 +                Name(_HID, EISAID("PNP0C0F")) 	// PCI interrupt link
  63.269 +                Name(_UID, 3)
  63.270 +                Method(_STA, 0) {
  63.271 +                               And(PIRC, 0x80, Local0)
  63.272 +                        If(LEqual(Local0, 0x80)) {
  63.273 +                                Return(0x09)	 
  63.274 +                        }
  63.275 +                        Else {
  63.276 +                                Return(0x0B)  	 
  63.277 +                        }
  63.278 +                }
  63.279 +
  63.280 +                Method(_PRS) {				 
  63.281 +                        Return(BUFA)			 
  63.282 +                } // Method(_PRS)			 
  63.283 +
  63.284 +                Method(_DIS) {
  63.285 +
  63.286 +                               Or(PIRC, 0x80, PIRC)
  63.287 +                }
  63.288 +
  63.289 +                Method(_CRS) {
  63.290 +                        And(PIRC, 0x0f, Local0)		 
  63.291 +                        ShiftLeft(0x1, Local0, IRQV)	 
  63.292 +                        Return(BUFB)			 
  63.293 +                } // Method(_CRS)
  63.294 +
  63.295 +                Method(_SRS, 1) {
  63.296 +                                CreateWordField(ARG0, 0x01, IRQ1)	 
  63.297 +                        FindSetRightBit(IRQ1, Local0)		 
  63.298 +                        Decrement(Local0)			 
  63.299 +                        Store(Local0, PIRC)			 
  63.300 +                 } // Method(_SRS)
  63.301 +        }
  63.302 +
  63.303 +        Device(LNKD)	{
  63.304 +                Name(_HID, EISAID("PNP0C0F")) 	 
  63.305 +                Name(_UID, 4)
  63.306 +                Method(_STA, 0) {
  63.307 +                               And(PIRD, 0x80, Local0)
  63.308 +                        If(LEqual(Local0, 0x80)) {
  63.309 +                                Return(0x09)	 
  63.310 +                        }
  63.311 +                        Else {
  63.312 +                                Return(0x0B)  	 
  63.313 +                        }
  63.314 +                }
  63.315 +
  63.316 +                Method(_PRS) {				 
  63.317 +                        Return(BUFA)			 
  63.318 +                } // Method(_PRS)			 
  63.319 +
  63.320 +                Method(_DIS) {
  63.321 +                               Or(PIRD, 0x80, PIRD)
  63.322 +                }
  63.323 +
  63.324 +                Method(_CRS) {
  63.325 +                        And(PIRD, 0x0f, Local0)		 
  63.326 +                        ShiftLeft(0x1, Local0, IRQV)	 
  63.327 +                        Return(BUFB)			 
  63.328 +                } // Method(_CRS)
  63.329 +
  63.330 +                Method(_SRS, 1) {
  63.331 +                                CreateWordField(ARG0, 0x01, IRQ1)	 
  63.332 +                        FindSetRightBit(IRQ1, Local0)		 
  63.333 +                        Decrement(Local0)			 
  63.334 +                        Store(Local0, PIRD)			 
  63.335 +                 } // Method(_SRS)
  63.336 +        }
  63.337 +        Method(_PRT,0) {
  63.338 +			If(PICD) {Return(PRTA)}  
  63.339 +			Return (PRTP)  
  63.340 +		} // end _PRT
  63.341 +		
  63.342 +		
  63.343 +        Name(PRTP, Package(){
  63.344 +                        Package(){0x0000ffff, 0, \_SB.PCI0.LNKA, 0}, 	// Slot 1, INTA
  63.345 +                        Package(){0x0000ffff, 1, \_SB.PCI0.LNKB, 0}, 	// Slot 1, INTB
  63.346 +                        Package(){0x0000ffff, 2, \_SB.PCI0.LNKC, 0}, 	// Slot 1, INTC
  63.347 +                        Package(){0x0000ffff, 3, \_SB.PCI0.LNKD, 0}, 	// Slot 1, INTD
  63.348 +
  63.349 +                        Package(){0x0001ffff, 0, \_SB.PCI0.LNKB, 0}, 	// Slot 2, INTB
  63.350 +                        Package(){0x0001ffff, 1, \_SB.PCI0.LNKC, 0}, 	// Slot 2, INTC
  63.351 +                        Package(){0x0001ffff, 2, \_SB.PCI0.LNKD, 0}, 	// Slot 2, INTD
  63.352 +                        Package(){0x0001ffff, 3, \_SB.PCI0.LNKA, 0}, 	// Slot 2, INTA
  63.353 +                        
  63.354 +                        Package(){0x0002ffff, 0, \_SB.PCI0.LNKC, 0}, 	// Slot 3, INTC
  63.355 +                        Package(){0x0002ffff, 1, \_SB.PCI0.LNKD, 0}, 	// Slot 3, INTD
  63.356 +                        Package(){0x0002ffff, 2, \_SB.PCI0.LNKA, 0}, 	// Slot 3, INTA
  63.357 +                        Package(){0x0002ffff, 3, \_SB.PCI0.LNKB, 0}, 	// Slot 3, INTB
  63.358 +                        
  63.359 +                        Package(){0x0003ffff, 0, \_SB.PCI0.LNKD, 0}, 	// Slot 2, INTD
  63.360 +                        Package(){0x0003ffff, 1, \_SB.PCI0.LNKA, 0}, 	// Slot 2, INTA
  63.361 +                        Package(){0x0003ffff, 2, \_SB.PCI0.LNKB, 0}, 	// Slot 2, INTB
  63.362 +                        Package(){0x0003ffff, 3, \_SB.PCI0.LNKC, 0}, 	// Slot 2, INTC
  63.363 +                        
  63.364 +                        }
  63.365 +            )
  63.366 +	Name(PRTA, Package(){
  63.367 +                        Package(){0x0001ffff, 0, 0, 5}, 	// Device 1, INTA
  63.368 +
  63.369 +                        Package(){0x0002ffff, 0, 0, 7}, 	// Device 2, INTA
  63.370 +                       
  63.371 +                        Package(){0x0003ffff, 0, 0, 10}, 	// Device 3, INTA
  63.372 +
  63.373 +                        Package(){0x0003ffff, 0, 0, 11}, 	// Device 4, INTA
  63.374 +                                   
  63.375 +                        
  63.376 +                        }
  63.377 +            )
  63.378 +            
  63.379              Device (ISA)
  63.380              {
  63.381 -                Name (_ADR, 0x00010000) /*TODO, device id, PCI bus num, ...*/
  63.382 -
  63.383 +                Name (_ADR, 0x00000000) /* device id, PCI bus num, ... */
  63.384 + 
  63.385 +		OperationRegion(PIRQ, PCI_Config, 0x60, 0x4)
  63.386 +                        Scope(\) {
  63.387 +                                Field (\_SB.PCI0.ISA.PIRQ, ByteAcc, NoLock, Preserve) {
  63.388 +                                        PIRA, 8,
  63.389 +