ia64/xen-unstable

changeset 5146:de3abc161c24

bitkeeper revision 1.1548 (4294554btfa2GpomqV57KFpxEHsjEA)

Move to Linux's cpumask_t and 'hotplug' multi-processor booting
interfaces. This also brings apic.c and various other files closer to
their Linux 2.6 equivalents. Simplified the scheduler interfaces a
little (particularly per-cpu and idle-domain initialisation).
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed May 25 10:36:59 2005 +0000 (2005-05-25)
parents d61ceec31dfd
children 63d8220a9b26
files xen/arch/ia64/domain.c xen/arch/ia64/xensetup.c xen/arch/x86/acpi/boot.c xen/arch/x86/apic.c xen/arch/x86/cdb.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/microcode.c xen/arch/x86/mtrr/main.c xen/arch/x86/nmi.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/smp.c xen/arch/x86/smpboot.c xen/arch/x86/time.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/common/ac_timer.c xen/common/dom0_ops.c xen/common/domain.c xen/common/page_alloc.c xen/common/perfc.c xen/common/sched_bvt.c xen/common/sched_sedf.c xen/common/schedule.c xen/common/trace.c xen/include/asm-x86/asm_defns.h xen/include/asm-x86/bitops.h xen/include/asm-x86/div64.h xen/include/asm-x86/flushtlb.h xen/include/asm-x86/irq.h xen/include/asm-x86/processor.h xen/include/xen/bitmap.h xen/include/xen/cpumask.h xen/include/xen/kernel.h xen/include/xen/sched-if.h xen/include/xen/sched.h xen/include/xen/smp.h
line diff
     1.1 --- a/xen/arch/ia64/domain.c	Wed May 25 10:32:53 2005 +0000
     1.2 +++ b/xen/arch/ia64/domain.c	Wed May 25 10:36:59 2005 +0000
     1.3 @@ -124,7 +124,6 @@ void startup_cpu_idle_loop(void)
     1.4  {
     1.5  	/* Just some sanity to ensure that the scheduler is set up okay. */
     1.6  	ASSERT(current->domain == IDLE_DOMAIN_ID);
     1.7 -	domain_unpause_by_systemcontroller(current->domain);
     1.8  	raise_softirq(SCHEDULE_SOFTIRQ);
     1.9  	do_softirq();
    1.10  
     2.1 --- a/xen/arch/ia64/xensetup.c	Wed May 25 10:32:53 2005 +0000
     2.2 +++ b/xen/arch/ia64/xensetup.c	Wed May 25 10:36:59 2005 +0000
     2.3 @@ -249,13 +249,11 @@ printk("About to call sort_main_extable(
     2.4      /* Create initial domain 0. */
     2.5  printk("About to call do_createdomain()\n");
     2.6      dom0 = do_createdomain(0, 0);
     2.7 -printk("About to call init_idle_task()\n");
     2.8      init_task.domain = &idle0_domain;
     2.9      init_task.processor = 0;
    2.10  //    init_task.mm = &init_mm;
    2.11      init_task.domain->arch.mm = &init_mm;
    2.12  //    init_task.thread = INIT_THREAD;
    2.13 -    init_idle_task();
    2.14      //arch_do_createdomain(current);
    2.15  #ifdef CLONE_DOMAIN0
    2.16      {
    2.17 @@ -314,7 +312,6 @@ printk("About to call init_trace_bufs()\
    2.18      console_endboot(cmdline && strstr(cmdline, "tty0"));
    2.19  #endif
    2.20  
    2.21 -    domain_unpause_by_systemcontroller(current->domain);
    2.22  #ifdef CLONE_DOMAIN0
    2.23      {
    2.24      int i;
     3.1 --- a/xen/arch/x86/acpi/boot.c	Wed May 25 10:32:53 2005 +0000
     3.2 +++ b/xen/arch/x86/acpi/boot.c	Wed May 25 10:36:59 2005 +0000
     3.3 @@ -34,7 +34,6 @@
     3.4  #include <asm/io_apic.h>
     3.5  #include <asm/apic.h>
     3.6  #include <asm/io.h>
     3.7 -#include <asm/irq.h>
     3.8  #include <asm/mpspec.h>
     3.9  #include <mach_apic.h>
    3.10  #include <mach_mpparse.h>
     4.1 --- a/xen/arch/x86/apic.c	Wed May 25 10:32:53 2005 +0000
     4.2 +++ b/xen/arch/x86/apic.c	Wed May 25 10:36:59 2005 +0000
     4.3 @@ -663,7 +663,7 @@ void (*wait_timer_tick)(void) __initdata
     4.4  
     4.5  #define APIC_DIVISOR 1
     4.6  
     4.7 -static void __setup_APIC_LVTT(unsigned int clocks)
     4.8 +void __setup_APIC_LVTT(unsigned int clocks)
     4.9  {
    4.10      unsigned int lvtt_value, tmp_value, ver;
    4.11  
    4.12 @@ -680,30 +680,33 @@ static void __setup_APIC_LVTT(unsigned i
    4.13      apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
    4.14  }
    4.15  
    4.16 -/*
    4.17 - * this is done for every CPU from setup_APIC_clocks() below.
    4.18 - * We setup each local APIC with a zero timeout value for now.
    4.19 - * Unlike Linux, we don't have to wait for slices etc.
    4.20 - */
    4.21 -void setup_APIC_timer(void * data)
    4.22 +static void __init setup_APIC_timer(unsigned int clocks)
    4.23  {
    4.24      unsigned long flags;
    4.25 -    __save_flags(flags);
    4.26 -    __sti();
    4.27 -    __setup_APIC_LVTT(0);
    4.28 -    __restore_flags(flags);
    4.29 +    
    4.30 +    local_irq_save(flags);
    4.31 +
    4.32 +    /*
    4.33 +     * Wait for IRQ0's slice:
    4.34 +     */
    4.35 +    wait_timer_tick();
    4.36 +
    4.37 +    __setup_APIC_LVTT(clocks);
    4.38 +
    4.39 +    local_irq_restore(flags);
    4.40  }
    4.41  
    4.42  /*
    4.43 - * In this function we calibrate APIC bus clocks to the external timer.
    4.44 + * In this function we calibrate APIC bus clocks to the external
    4.45 + * timer. Unfortunately we cannot use jiffies and the timer irq
    4.46 + * to calibrate, since some later bootup code depends on getting
    4.47 + * the first irq? Ugh.
    4.48   *
    4.49 - * As a result we have the Bus Speed and CPU speed in Hz.
    4.50 - * 
    4.51 - * We want to do the calibration only once (for CPU0).  CPUs connected by the
    4.52 - * same APIC bus have the very same bus frequency.
    4.53 - *
    4.54 - * This bit is a bit shoddy since we use the very same periodic timer interrupt
    4.55 - * we try to eliminate to calibrate the APIC. 
    4.56 + * We want to do the calibration only once since we
    4.57 + * want to have local timer irqs syncron. CPUs connected
    4.58 + * by the same APIC bus have the very same bus frequency.
    4.59 + * And we want to have irqs off anyways, no accidental
    4.60 + * APIC irq that way.
    4.61   */
    4.62  
    4.63  int __init calibrate_APIC_clock(void)
    4.64 @@ -780,21 +783,48 @@ int __init calibrate_APIC_clock(void)
    4.65      return result;
    4.66  }
    4.67  
    4.68 -/*
    4.69 - * initialise the APIC timers for all CPUs
    4.70 - * we start with the first and find out processor frequency and bus speed
    4.71 - */
    4.72 -void __init setup_APIC_clocks (void)
    4.73 +
    4.74 +static unsigned int calibration_result;
    4.75 +
    4.76 +void __init setup_boot_APIC_clock(void)
    4.77  {
    4.78 +    apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
    4.79      using_apic_timer = 1;
    4.80 -    __cli();
    4.81 -    /* calibrate CPU0 for CPU speed and BUS speed */
    4.82 -    bus_freq = calibrate_APIC_clock();
    4.83 -    /* Now set up the timer for real. */
    4.84 -    setup_APIC_timer((void *)bus_freq);
    4.85 -    __sti();
    4.86 -    /* and update all other cpus */
    4.87 -    smp_call_function(setup_APIC_timer, (void *)bus_freq, 1, 1);
    4.88 +
    4.89 +    local_irq_disable();
    4.90 +    
    4.91 +    calibration_result = calibrate_APIC_clock();
    4.92 +    /*
    4.93 +     * Now set up the timer for real.
    4.94 +     */
    4.95 +    setup_APIC_timer(calibration_result);
    4.96 +    
    4.97 +    local_irq_enable();
    4.98 +}
    4.99 +
   4.100 +void __init setup_secondary_APIC_clock(void)
   4.101 +{
   4.102 +    setup_APIC_timer(calibration_result);
   4.103 +}
   4.104 +
   4.105 +void __init disable_APIC_timer(void)
   4.106 +{
   4.107 +    if (using_apic_timer) {
   4.108 +        unsigned long v;
   4.109 +        
   4.110 +        v = apic_read(APIC_LVTT);
   4.111 +        apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
   4.112 +    }
   4.113 +}
   4.114 +
   4.115 +void enable_APIC_timer(void)
   4.116 +{
   4.117 +    if (using_apic_timer) {
   4.118 +        unsigned long v;
   4.119 +        
   4.120 +        v = apic_read(APIC_LVTT);
   4.121 +        apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
   4.122 +    }
   4.123  }
   4.124  
   4.125  #undef APIC_DIVISOR
   4.126 @@ -885,7 +915,7 @@ asmlinkage void smp_spurious_interrupt(s
   4.127          ack_APIC_irq();
   4.128  
   4.129      /* see sw-dev-man vol 3, chapter 7.4.13.5 */
   4.130 -    printk("spurious APIC interrupt on CPU#%d, should never happen.\n",
   4.131 +    printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
   4.132             smp_processor_id());
   4.133  }
   4.134  
   4.135 @@ -914,8 +944,8 @@ asmlinkage void smp_error_interrupt(stru
   4.136         6: Received illegal vector
   4.137         7: Illegal register address
   4.138      */
   4.139 -    printk("APIC error on CPU%d: %02lx(%02lx)\n",
   4.140 -            smp_processor_id(), v, v1);
   4.141 +    printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
   4.142 +            smp_processor_id(), v , v1);
   4.143  }
   4.144  
   4.145  /*
   4.146 @@ -940,20 +970,18 @@ int __init APIC_init_uniprocessor (void)
   4.147  
   4.148      connect_bsp_APIC();
   4.149  
   4.150 -#ifdef CONFIG_SMP
   4.151 -    cpu_online_map = 1;
   4.152 -#endif
   4.153      phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
   4.154 -    apic_write_around(APIC_ID, boot_cpu_physical_apicid);
   4.155  
   4.156      setup_local_APIC();
   4.157  
   4.158 +    if (nmi_watchdog == NMI_LOCAL_APIC)
   4.159 +        check_nmi_watchdog();
   4.160  #ifdef CONFIG_X86_IO_APIC
   4.161      if (smp_found_config)
   4.162          if (!skip_ioapic_setup && nr_ioapics)
   4.163              setup_IO_APIC();
   4.164  #endif
   4.165 -    setup_APIC_clocks();
   4.166 +    setup_boot_APIC_clock();
   4.167  
   4.168      return 0;
   4.169  }
     5.1 --- a/xen/arch/x86/cdb.c	Wed May 25 10:32:53 2005 +0000
     5.2 +++ b/xen/arch/x86/cdb.c	Wed May 25 10:36:59 2005 +0000
     5.3 @@ -9,7 +9,7 @@
     5.4  #include <xen/lib.h>
     5.5  #include <asm/uaccess.h>
     5.6  #include <xen/serial.h>
     5.7 -#include <asm/irq.h>
     5.8 +#include <xen/irq.h>
     5.9  #include <xen/spinlock.h>
    5.10  #include <asm/debugger.h>
    5.11  #include <xen/init.h>
     6.1 --- a/xen/arch/x86/dom0_ops.c	Wed May 25 10:32:53 2005 +0000
     6.2 +++ b/xen/arch/x86/dom0_ops.c	Wed May 25 10:36:59 2005 +0000
     6.3 @@ -176,8 +176,8 @@ long arch_do_dom0_op(dom0_op_t *op, dom0
     6.4      {
     6.5          dom0_physinfo_t *pi = &op->u.physinfo;
     6.6  
     6.7 -        pi->ht_per_core = opt_noht ? 1 : ht_per_core;
     6.8 -        pi->cores       = smp_num_cpus / pi->ht_per_core;
     6.9 +        pi->ht_per_core = ht_per_core;
    6.10 +        pi->cores       = num_online_cpus() / ht_per_core;
    6.11          pi->total_pages = max_page;
    6.12          pi->free_pages  = avail_domheap_pages();
    6.13          pi->cpu_khz     = cpu_khz;
     7.1 --- a/xen/arch/x86/domain.c	Wed May 25 10:32:53 2005 +0000
     7.2 +++ b/xen/arch/x86/domain.c	Wed May 25 10:36:59 2005 +0000
     7.3 @@ -73,44 +73,31 @@ static void default_idle(void)
     7.4  void idle_loop(void)
     7.5  {
     7.6      int cpu = smp_processor_id();
     7.7 +
     7.8      for ( ; ; )
     7.9      {
    7.10          irq_stat[cpu].idle_timestamp = jiffies;
    7.11 +
    7.12          while ( !softirq_pending(cpu) )
    7.13          {
    7.14              page_scrub_schedule_work();
    7.15              default_idle();
    7.16          }
    7.17 +
    7.18          do_softirq();
    7.19      }
    7.20  }
    7.21  
    7.22 -static void __startup_cpu_idle_loop(struct exec_domain *ed)
    7.23 -{
    7.24 -    /* Signal to boot CPU that we are done. */
    7.25 -    init_idle();
    7.26 -
    7.27 -    /* Start normal idle loop. */
    7.28 -    ed->arch.schedule_tail = continue_idle_task;
    7.29 -    continue_idle_task(ed);
    7.30 -}
    7.31 -
    7.32  void startup_cpu_idle_loop(void)
    7.33  {
    7.34      struct exec_domain *ed = current;
    7.35  
    7.36 -    /* Just some sanity to ensure that the scheduler is set up okay. */
    7.37 -    ASSERT(ed->domain->domain_id == IDLE_DOMAIN_ID);
    7.38 +    ASSERT(is_idle_task(ed->domain));
    7.39      percpu_ctxt[smp_processor_id()].curr_ed = ed;
    7.40      set_bit(smp_processor_id(), &ed->domain->cpuset);
    7.41 -    domain_unpause_by_systemcontroller(ed->domain);
    7.42 +    ed->arch.schedule_tail = continue_idle_task;
    7.43  
    7.44 -    ed->arch.schedule_tail = __startup_cpu_idle_loop;
    7.45 -    raise_softirq(SCHEDULE_SOFTIRQ);
    7.46 -    do_softirq();
    7.47 -
    7.48 -    /* End up in __startup_cpu_idle_loop, not here. */
    7.49 -    BUG();
    7.50 +    idle_loop();
    7.51  }
    7.52  
    7.53  static long no_idt[2];
    7.54 @@ -244,7 +231,7 @@ void arch_do_createdomain(struct exec_do
    7.55  
    7.56      ed->arch.flags = TF_kernel_mode;
    7.57  
    7.58 -    if ( d->domain_id == IDLE_DOMAIN_ID )
    7.59 +    if ( is_idle_task(d) )
    7.60          return;
    7.61  
    7.62      ed->arch.schedule_tail = continue_nonidle_task;
     8.1 --- a/xen/arch/x86/domain_build.c	Wed May 25 10:32:53 2005 +0000
     8.2 +++ b/xen/arch/x86/domain_build.c	Wed May 25 10:36:59 2005 +0000
     8.3 @@ -438,7 +438,7 @@ int construct_dom0(struct domain *d,
     8.4      /* Mask all upcalls... */
     8.5      for ( i = 0; i < MAX_VIRT_CPUS; i++ )
     8.6          d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
     8.7 -    d->shared_info->n_vcpu = smp_num_cpus;
     8.8 +    d->shared_info->n_vcpu = num_online_cpus();
     8.9  
    8.10      /* Set up monitor table */
    8.11      update_pagetables(ed);
     9.1 --- a/xen/arch/x86/io_apic.c	Wed May 25 10:32:53 2005 +0000
     9.2 +++ b/xen/arch/x86/io_apic.c	Wed May 25 10:36:59 2005 +0000
     9.3 @@ -2259,7 +2259,7 @@ int ioapic_guest_write(int apicid, int a
     9.4      
     9.5      pin = (address - 0x10) >> 1;
     9.6  
     9.7 -    rte.dest.logical.logical_dest = target_cpus();
     9.8 +    rte.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
     9.9      *(int *)&rte = val;
    9.10  
    9.11      if ( rte.vector >= FIRST_DEVICE_VECTOR )
    10.1 --- a/xen/arch/x86/irq.c	Wed May 25 10:32:53 2005 +0000
    10.2 +++ b/xen/arch/x86/irq.c	Wed May 25 10:36:59 2005 +0000
    10.3 @@ -237,6 +237,7 @@ int pirq_guest_bind(struct exec_domain *
    10.4      irq_guest_action_t *action;
    10.5      unsigned long       flags;
    10.6      int                 rc = 0;
    10.7 +    cpumask_t           cpumask = CPU_MASK_NONE;
    10.8  
    10.9      if ( !IS_CAPABLE_PHYSDEV(d) )
   10.10          return -EPERM;
   10.11 @@ -273,9 +274,9 @@ int pirq_guest_bind(struct exec_domain *
   10.12          desc->handler->startup(irq);
   10.13  
   10.14          /* Attempt to bind the interrupt target to the correct CPU. */
   10.15 +        cpu_set(ed->processor, cpumask);
   10.16          if ( desc->handler->set_affinity != NULL )
   10.17 -            desc->handler->set_affinity(
   10.18 -                irq, apicid_to_phys_cpu_present(ed->processor));
   10.19 +            desc->handler->set_affinity(irq, cpumask);
   10.20      }
   10.21      else if ( !will_share || !action->shareable )
   10.22      {
    11.1 --- a/xen/arch/x86/microcode.c	Wed May 25 10:32:53 2005 +0000
    11.2 +++ b/xen/arch/x86/microcode.c	Wed May 25 10:36:59 2005 +0000
    11.3 @@ -86,7 +86,6 @@
    11.4  #define up(_m) spin_unlock(_m)
    11.5  #define vmalloc(_s) xmalloc_bytes(_s)
    11.6  #define vfree(_p) xfree(_p)
    11.7 -#define num_online_cpus() smp_num_cpus
    11.8  
    11.9  #if 0
   11.10  MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
    12.1 --- a/xen/arch/x86/mtrr/main.c	Wed May 25 10:32:53 2005 +0000
    12.2 +++ b/xen/arch/x86/mtrr/main.c	Wed May 25 10:36:59 2005 +0000
    12.3 @@ -49,8 +49,6 @@
    12.4  #define down(_m) spin_lock(_m)
    12.5  #define up(_m) spin_unlock(_m)
    12.6  
    12.7 -#define num_booting_cpus() smp_num_cpus
    12.8 -
    12.9  u32 num_var_ranges = 0;
   12.10  
   12.11  unsigned int *usage_table;
    13.1 --- a/xen/arch/x86/nmi.c	Wed May 25 10:32:53 2005 +0000
    13.2 +++ b/xen/arch/x86/nmi.c	Wed May 25 10:36:59 2005 +0000
    13.3 @@ -92,13 +92,16 @@ int __init check_nmi_watchdog (void)
    13.4  
    13.5      printk("Testing NMI watchdog --- ");
    13.6  
    13.7 -    for ( cpu = 0; cpu < smp_num_cpus; cpu++ ) 
    13.8 +    for ( cpu = 0; cpu < NR_CPUS; cpu++ ) 
    13.9          prev_nmi_count[cpu] = nmi_count(cpu);
   13.10 -    __sti();
   13.11 +    local_irq_enable();
   13.12      mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
   13.13  
   13.14 -    for ( cpu = 0; cpu < smp_num_cpus; cpu++ ) 
   13.15 +    for ( cpu = 0; cpu < NR_CPUS; cpu++ ) 
   13.16      {
   13.17 +        if ( !cpu_isset(cpu, cpu_callin_map) && 
   13.18 +             !cpu_isset(cpu, cpu_online_map) )
   13.19 +            continue;
   13.20          if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
   13.21              printk("CPU#%d stuck. ", cpu);
   13.22          else
   13.23 @@ -277,13 +280,6 @@ void watchdog_enable(void)
   13.24      spin_unlock_irqrestore(&watchdog_lock, flags);
   13.25  }
   13.26  
   13.27 -void touch_nmi_watchdog (void)
   13.28 -{
   13.29 -    int i;
   13.30 -    for (i = 0; i < smp_num_cpus; i++)
   13.31 -        alert_counter[i] = 0;
   13.32 -}
   13.33 -
   13.34  void nmi_watchdog_tick (struct cpu_user_regs * regs)
   13.35  {
   13.36      int sum, cpu = smp_processor_id();
    14.1 --- a/xen/arch/x86/setup.c	Wed May 25 10:32:53 2005 +0000
    14.2 +++ b/xen/arch/x86/setup.c	Wed May 25 10:36:59 2005 +0000
    14.3 @@ -33,6 +33,14 @@ integer_param("xenheap_megabytes", opt_x
    14.4  int opt_noht = 0;
    14.5  boolean_param("noht", opt_noht);
    14.6  
    14.7 +/* opt_nosmp: If true, secondary processors are ignored. */
    14.8 +static int opt_nosmp = 0;
    14.9 +boolean_param("nosmp", opt_nosmp);
   14.10 +
   14.11 +/* maxcpus: maximum number of CPUs to activate. */
   14.12 +static unsigned int max_cpus = NR_CPUS;
   14.13 +integer_param("maxcpus", max_cpus); 
   14.14 +
   14.15  /* opt_watchdog: If true, run a watchdog NMI on each processor. */
   14.16  static int opt_watchdog = 0;
   14.17  boolean_param("watchdog", opt_watchdog);
   14.18 @@ -58,6 +66,9 @@ boolean_param("noapic", skip_ioapic_setu
   14.19  
   14.20  int early_boot = 1;
   14.21  
   14.22 +int ht_per_core = 1;
   14.23 +cpumask_t cpu_present_map;
   14.24 +
   14.25  /* Limits of Xen heap, used to initialise the allocator. */
   14.26  unsigned long xenheap_phys_start, xenheap_phys_end;
   14.27  
   14.28 @@ -67,7 +78,6 @@ extern void trap_init(void);
   14.29  extern void time_init(void);
   14.30  extern void ac_timer_init(void);
   14.31  extern void initialize_keytable();
   14.32 -extern int do_timer_lists_from_pit;
   14.33  
   14.34  extern unsigned long cpu0_stack[];
   14.35  
   14.36 @@ -80,13 +90,10 @@ unsigned long mmu_cr4_features = X86_CR4
   14.37  #endif
   14.38  EXPORT_SYMBOL(mmu_cr4_features);
   14.39  
   14.40 -unsigned long wait_init_idle;
   14.41 -
   14.42  struct exec_domain *idle_task[NR_CPUS] = { &idle0_exec_domain };
   14.43  
   14.44  int acpi_disabled;
   14.45  
   14.46 -int phys_proc_id[NR_CPUS];
   14.47  int logical_proc_id[NR_CPUS];
   14.48  
   14.49  /* Standard macro to see if a specific flag is changeable. */
   14.50 @@ -147,12 +154,11 @@ static void __init init_intel(struct cpu
   14.51      if ( c->x86 == 6 && c->x86_model < 3 && c->x86_mask < 3 )
   14.52          clear_bit(X86_FEATURE_SEP, &c->x86_capability);
   14.53  
   14.54 -#ifdef CONFIG_SMP
   14.55      if ( test_bit(X86_FEATURE_HT, &c->x86_capability) )
   14.56      {
   14.57          u32     eax, ebx, ecx, edx;
   14.58          int     initial_apic_id, siblings, cpu = smp_processor_id();
   14.59 -        
   14.60 +
   14.61          cpuid(1, &eax, &ebx, &ecx, &edx);
   14.62          ht_per_core = siblings = (ebx & 0xff0000) >> 16;
   14.63  
   14.64 @@ -176,7 +182,6 @@ static void __init init_intel(struct cpu
   14.65                     cpu, phys_proc_id[cpu], logical_proc_id[cpu]);
   14.66          }
   14.67      }
   14.68 -#endif
   14.69  
   14.70  #ifdef CONFIG_VMX
   14.71      start_vmx();
   14.72 @@ -292,6 +297,10 @@ void __init identify_cpu(struct cpuinfo_
   14.73      }
   14.74  }
   14.75  
   14.76 +void __init print_cpu_info(struct cpuinfo_x86 *c)
   14.77 +{
   14.78 +    printk("booted.\n");
   14.79 +}
   14.80  
   14.81  unsigned long cpu_initialized;
   14.82  void __init cpu_init(void)
   14.83 @@ -335,8 +344,6 @@ void __init cpu_init(void)
   14.84  
   14.85      /* Install correct page table. */
   14.86      write_ptbase(current);
   14.87 -
   14.88 -    init_idle_task();
   14.89  }
   14.90  
   14.91  int acpi_force;
   14.92 @@ -383,6 +390,8 @@ static void __init do_initcalls(void)
   14.93  
   14.94  static void __init start_of_day(void)
   14.95  {
   14.96 +    int i;
   14.97 +
   14.98      /* Unmap the first page of CPU0's stack. */
   14.99      memguard_guard_stack(cpu0_stack);
  14.100  
  14.101 @@ -421,8 +430,6 @@ static void __init start_of_day(void)
  14.102  
  14.103      init_apic_mappings();
  14.104  
  14.105 -    scheduler_init();	
  14.106 -
  14.107      init_IRQ();
  14.108  
  14.109      trap_init();
  14.110 @@ -431,41 +438,41 @@ static void __init start_of_day(void)
  14.111  
  14.112      arch_init_memory();
  14.113  
  14.114 -    smp_boot_cpus();
  14.115 +    scheduler_init();	
  14.116  
  14.117 -    __sti();
  14.118 +    if ( opt_nosmp )
  14.119 +        max_cpus = 0;
  14.120 +    smp_prepare_cpus(max_cpus);
  14.121 +
  14.122 +    /* We aren't hotplug-capable yet. */
  14.123 +    BUG_ON(!cpus_empty(cpu_present_map));
  14.124 +    for_each_cpu ( i )
  14.125 +        cpu_set(i, cpu_present_map);
  14.126  
  14.127      initialize_keytable();
  14.128  
  14.129      serial_init_stage2();
  14.130  
  14.131 -    if ( !cpu_has_apic )
  14.132 +    ac_timer_init();
  14.133 +
  14.134 +    init_xen_time();
  14.135 +
  14.136 +    for_each_present_cpu ( i )
  14.137      {
  14.138 -        do_timer_lists_from_pit = 1;
  14.139 -        if ( smp_num_cpus != 1 )
  14.140 -            panic("We need local APICs on SMP machines!");
  14.141 +        if ( num_online_cpus() >= max_cpus )
  14.142 +            break;
  14.143 +        if ( !cpu_online(i) )
  14.144 +            __cpu_up(i);
  14.145      }
  14.146  
  14.147 -    ac_timer_init();    /* init accurate timers */
  14.148 -    init_xen_time();	/* initialise the time */
  14.149 -    schedulers_start(); /* start scheduler for each CPU */
  14.150 -
  14.151 -    check_nmi_watchdog();
  14.152 +    printk("Brought up %ld CPUs\n", (long)num_online_cpus());
  14.153 +    smp_cpus_done(max_cpus);
  14.154  
  14.155      do_initcalls();
  14.156  
  14.157 -    wait_init_idle = cpu_online_map;
  14.158 -    clear_bit(smp_processor_id(), &wait_init_idle);
  14.159 -    smp_threads_ready = 1;
  14.160 -    smp_commence(); /* Tell other CPUs that state of the world is stable. */
  14.161 -    while ( wait_init_idle != 0 )
  14.162 -        cpu_relax();
  14.163 +    schedulers_start();
  14.164  
  14.165      watchdog_enable();
  14.166 -
  14.167 -#ifdef CONFIG_X86_64 /* x86_32 uses low mappings when building DOM0. */
  14.168 -    zap_low_mappings();
  14.169 -#endif
  14.170  }
  14.171  
  14.172  #define EARLY_FAIL() for ( ; ; ) __asm__ __volatile__ ( "hlt" )
  14.173 @@ -487,6 +494,8 @@ void __init __start_xen(multiboot_info_t
  14.174      set_current(&idle0_exec_domain);
  14.175      set_processor_id(0);
  14.176  
  14.177 +    smp_prepare_boot_cpu();
  14.178 +
  14.179      /* We initialise the serial devices very early so we can get debugging. */
  14.180      serial_init_stage1();
  14.181  
  14.182 @@ -695,8 +704,8 @@ void __init __start_xen(multiboot_info_t
  14.183      /* Hide UART from DOM0 if we're using it */
  14.184      serial_endboot();
  14.185  
  14.186 -    domain_unpause_by_systemcontroller(current->domain);
  14.187      domain_unpause_by_systemcontroller(dom0);
  14.188 +
  14.189      startup_cpu_idle_loop();
  14.190  }
  14.191  
    15.1 --- a/xen/arch/x86/shadow.c	Wed May 25 10:32:53 2005 +0000
    15.2 +++ b/xen/arch/x86/shadow.c	Wed May 25 10:36:59 2005 +0000
    15.3 @@ -2525,7 +2525,7 @@ void __shadow_sync_all(struct domain *d)
    15.4      // page table page needs to be vcpu private).
    15.5      //
    15.6  #if 0 // this should be enabled for SMP guests...
    15.7 -    flush_tlb_mask(((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id()));
    15.8 +    flush_tlb_mask(((1<<num_online_cpus()) - 1) & ~(1<<smp_processor_id()));
    15.9  #endif
   15.10      need_flush = 1;
   15.11  
    16.1 --- a/xen/arch/x86/smp.c	Wed May 25 10:32:53 2005 +0000
    16.2 +++ b/xen/arch/x86/smp.c	Wed May 25 10:36:59 2005 +0000
    16.3 @@ -141,7 +141,7 @@ static inline void send_IPI_allbutself(i
    16.4       * If there are no other CPUs in the system then we get an APIC send error 
    16.5       * if we try to broadcast. thus we have to avoid sending IPIs in this case.
    16.6       */
    16.7 -    if ( smp_num_cpus <= 1 )
    16.8 +    if ( num_online_cpus() <= 1 )
    16.9          return;
   16.10  
   16.11      __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
   16.12 @@ -192,10 +192,10 @@ void new_tlbflush_clock_period(void)
   16.13      ASSERT(local_irq_is_enabled());
   16.14      
   16.15      /* Flush everyone else. We definitely flushed just before entry. */
   16.16 -    if ( smp_num_cpus > 1 )
   16.17 +    if ( num_online_cpus() > 1 )
   16.18      {
   16.19          spin_lock(&flush_lock);
   16.20 -        flush_cpumask  = (1UL << smp_num_cpus) - 1;
   16.21 +        flush_cpumask  = (1UL << num_online_cpus()) - 1;
   16.22          flush_cpumask &= ~(1UL << smp_processor_id());
   16.23          flush_va       = FLUSHVA_ALL;
   16.24          send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
   16.25 @@ -257,7 +257,7 @@ int smp_call_function(
   16.26  
   16.27      ASSERT(local_irq_is_enabled());
   16.28  
   16.29 -    cpuset = ((1UL << smp_num_cpus) - 1) & ~(1UL << smp_processor_id());
   16.30 +    cpuset = ((1UL << num_online_cpus()) - 1) & ~(1UL << smp_processor_id());
   16.31      if ( cpuset == 0 )
   16.32          return 0;
   16.33  
   16.34 @@ -295,7 +295,6 @@ void smp_send_stop(void)
   16.35  {
   16.36      /* Stop all other CPUs in the system. */
   16.37      smp_call_function(stop_this_cpu, NULL, 1, 0);
   16.38 -    smp_num_cpus = 1;
   16.39  
   16.40      local_irq_disable();
   16.41      disable_local_APIC();
    17.1 --- a/xen/arch/x86/smpboot.c	Wed May 25 10:32:53 2005 +0000
    17.2 +++ b/xen/arch/x86/smpboot.c	Wed May 25 10:36:59 2005 +0000
    17.3 @@ -17,7 +17,7 @@
    17.4   *	Fixes
    17.5   *		Felix Koop	:	NR_CPUS used properly
    17.6   *		Jose Renau	:	Handle single CPU case.
    17.7 - *		Alan Cox	:	By repeated request 8) - Total BogoMIP report.
    17.8 + *		Alan Cox	:	By repeated request 8) - Total BogoMIPS report.
    17.9   *		Greg Wright	:	Fix for kernel stacks panic.
   17.10   *		Erich Boleyn	:	MP v1.4 and additional changes.
   17.11   *	Matthias Sattler	:	Changes for 2.1 kernel map.
   17.12 @@ -30,52 +30,51 @@
   17.13   *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
   17.14   *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
   17.15   *		Martin J. Bligh	: 	Added support for multi-quad systems
   17.16 - */
   17.17 + *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
   17.18 +*		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */
   17.19  
   17.20  #include <xen/config.h>
   17.21  #include <xen/init.h>
   17.22 -#include <xen/irq.h>
   17.23 +#include <xen/kernel.h>
   17.24  #include <xen/mm.h>
   17.25 -#include <xen/slab.h>
   17.26 -#include <asm/flushtlb.h>
   17.27 +#include <xen/sched.h>
   17.28 +#include <xen/irq.h>
   17.29 +#include <xen/delay.h>
   17.30  #include <asm/mc146818rtc.h>
   17.31 -#include <asm/smpboot.h>
   17.32 -#include <xen/smp.h>
   17.33 +#include <asm/desc.h>
   17.34 +#include <asm/div64.h>
   17.35  #include <asm/msr.h>
   17.36 -#include <asm/system.h>
   17.37 -#include <asm/mpspec.h>
   17.38 -#include <asm/io_apic.h>
   17.39 -#include <xen/sched.h>
   17.40 -#include <xen/delay.h>
   17.41 -#include <xen/lib.h>
   17.42  #include <mach_apic.h>
   17.43  #include <mach_wakecpu.h>
   17.44 -
   17.45 -/* opt_nosmp: If true, secondary processors are ignored. */
   17.46 -static int opt_nosmp = 0;
   17.47 -boolean_param("nosmp", opt_nosmp);
   17.48 +#include <smpboot_hooks.h>
   17.49  
   17.50 -/* maxcpus: maximum number of CPUs to activate. */
   17.51 -static int max_cpus = -1;
   17.52 -integer_param("maxcpus", max_cpus); 
   17.53 +static int _foo;
   17.54 +#define set_kernel_exec(x,y) (_foo=0)
   17.55 +#define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */
   17.56 +int tainted;
   17.57 +#define TAINT_UNSAFE_SMP 0
   17.58  
   17.59 -/* Total count of live CPUs */
   17.60 -int smp_num_cpus = 1;
   17.61 +/* Set if we find a B stepping CPU */
   17.62 +static int __initdata smp_b_stepping;
   17.63  
   17.64 -/* Number of hyperthreads per core */
   17.65 -int ht_per_core = 1;
   17.66 +/* Number of siblings per CPU package */
   17.67 +int smp_num_siblings = 1;
   17.68 +int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
   17.69 +EXPORT_SYMBOL(phys_proc_id);
   17.70  
   17.71 -/* Bitmask of currently online CPUs */
   17.72 +/* bitmap of online cpus */
   17.73  cpumask_t cpu_online_map;
   17.74  
   17.75  cpumask_t cpu_callin_map;
   17.76  cpumask_t cpu_callout_map;
   17.77 +static cpumask_t smp_commenced_mask;
   17.78  
   17.79  /* Per CPU bogomips and other parameters */
   17.80 -struct cpuinfo_x86 cpu_data[NR_CPUS];
   17.81 +struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
   17.82  
   17.83 -/* Set when the idlers are all forked */
   17.84 -int smp_threads_ready;
   17.85 +u8 x86_cpu_to_apicid[NR_CPUS] =
   17.86 +			{ [0 ... NR_CPUS-1] = 0xff };
   17.87 +EXPORT_SYMBOL(x86_cpu_to_apicid);
   17.88  
   17.89  /*
   17.90   * Trampoline 80x86 program as an array.
   17.91 @@ -84,6 +83,7 @@ int smp_threads_ready;
   17.92  extern unsigned char trampoline_data [];
   17.93  extern unsigned char trampoline_end  [];
   17.94  static unsigned char *trampoline_base;
   17.95 +static int trampoline_exec;
   17.96  
   17.97  /*
   17.98   * Currently trivial. Write the real->protected mode
   17.99 @@ -93,8 +93,8 @@ static unsigned char *trampoline_base;
  17.100  
  17.101  static unsigned long __init setup_trampoline(void)
  17.102  {
  17.103 -    memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
  17.104 -    return virt_to_phys(trampoline_base);
  17.105 +	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
  17.106 +	return virt_to_phys(trampoline_base);
  17.107  }
  17.108  
  17.109  /*
  17.110 @@ -103,11 +103,17 @@ static unsigned long __init setup_trampo
  17.111   */
  17.112  void __init smp_alloc_memory(void)
  17.113  {
  17.114 -    /*
  17.115 -     * Has to be in very low memory so we can execute
  17.116 -     * real-mode AP code.
  17.117 -     */
  17.118 -    trampoline_base = __va(0x90000);
  17.119 +	trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
  17.120 +	/*
  17.121 +	 * Has to be in very low memory so we can execute
  17.122 +	 * real-mode AP code.
  17.123 +	 */
  17.124 +	if (__pa(trampoline_base) >= 0x9F000)
  17.125 +		BUG();
  17.126 +	/*
  17.127 +	 * Make the SMP trampoline executable:
  17.128 +	 */
  17.129 +	trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1);
  17.130  }
  17.131  
  17.132  /*
  17.133 @@ -115,40 +121,63 @@ void __init smp_alloc_memory(void)
  17.134   * a given CPU
  17.135   */
  17.136  
  17.137 -void __init smp_store_cpu_info(int id)
  17.138 +static void __init smp_store_cpu_info(int id)
  17.139  {
  17.140 -    cpu_data[id] = boot_cpu_data;
  17.141 -    if (id != 0)
  17.142 -        identify_cpu(&cpu_data[id]);
  17.143 -}
  17.144 +	struct cpuinfo_x86 *c = cpu_data + id;
  17.145  
  17.146 -/*
  17.147 - * Architecture specific routine called by the kernel just before init is
  17.148 - * fired off. This allows the BP to have everything in order [we hope].
  17.149 - * At the end of this all the APs will hit the system scheduling and off
  17.150 - * we go. Each AP will load the system gdt's and jump through the kernel
  17.151 - * init into idle(). At this point the scheduler will one day take over
  17.152 - * and give them jobs to do. smp_callin is a standard routine
  17.153 - * we use to track CPUs as they power up.
  17.154 - */
  17.155 +	*c = boot_cpu_data;
  17.156 +	if (id!=0)
  17.157 +		identify_cpu(c);
  17.158 +	/*
  17.159 +	 * Mask B, Pentium, but not Pentium MMX
  17.160 +	 */
  17.161 +	if (c->x86_vendor == X86_VENDOR_INTEL &&
  17.162 +	    c->x86 == 5 &&
  17.163 +	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
  17.164 +	    c->x86_model <= 3)
  17.165 +		/*
  17.166 +		 * Remember we have B step Pentia with bugs
  17.167 +		 */
  17.168 +		smp_b_stepping = 1;
  17.169  
  17.170 -static atomic_t smp_commenced = ATOMIC_INIT(0);
  17.171 +	/*
  17.172 +	 * Certain Athlons might work (for various values of 'work') in SMP
  17.173 +	 * but they are not certified as MP capable.
  17.174 +	 */
  17.175 +	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
  17.176  
  17.177 -void __init smp_commence(void)
  17.178 -{
  17.179 -    /*
  17.180 -     * Lets the callins below out of their loop.
  17.181 -     */
  17.182 -    Dprintk("Setting commenced=1, go go go\n");
  17.183 +		/* Athlon 660/661 is valid. */	
  17.184 +		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
  17.185 +			goto valid_k7;
  17.186  
  17.187 -    wmb();
  17.188 -    atomic_set(&smp_commenced,1);
  17.189 +		/* Duron 670 is valid */
  17.190 +		if ((c->x86_model==7) && (c->x86_mask==0))
  17.191 +			goto valid_k7;
  17.192 +
  17.193 +		/*
  17.194 +		 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
  17.195 +		 * It's worth noting that the A5 stepping (662) of some Athlon XP's
  17.196 +		 * have the MP bit set.
  17.197 +		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
  17.198 +		 */
  17.199 +		if (((c->x86_model==6) && (c->x86_mask>=2)) ||
  17.200 +		    ((c->x86_model==7) && (c->x86_mask>=1)) ||
  17.201 +		     (c->x86_model> 7))
  17.202 +			if (cpu_has_mp)
  17.203 +				goto valid_k7;
  17.204 +
  17.205 +		/* If we get here, it's not a certified SMP capable AMD system. */
  17.206 +		tainted |= TAINT_UNSAFE_SMP;
  17.207 +	}
  17.208 +
  17.209 +valid_k7:
  17.210 +	;
  17.211  }
  17.212  
  17.213  /*
  17.214   * TSC synchronization.
  17.215   *
  17.216 - * We first check wether all CPUs have their TSC's synchronized,
  17.217 + * We first check whether all CPUs have their TSC's synchronized,
  17.218   * then we print a warning if not, and always resync.
  17.219   */
  17.220  
  17.221 @@ -159,616 +188,724 @@ static unsigned long long tsc_values[NR_
  17.222  
  17.223  #define NR_LOOPS 5
  17.224  
  17.225 -/*
  17.226 - * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
  17.227 - * multiplication. Not terribly optimized but we need it at boot time only
  17.228 - * anyway.
  17.229 - *
  17.230 - * result == a / b
  17.231 - *	== (a1 + a2*(2^32)) / b
  17.232 - *	== a1/b + a2*(2^32/b)
  17.233 - *	== a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
  17.234 - *		    ^---- (this multiplication can overflow)
  17.235 - */
  17.236 -
  17.237 -static unsigned long long div64 (unsigned long long a, unsigned long b0)
  17.238 -{
  17.239 -    unsigned int a1, a2;
  17.240 -    unsigned long long res;
  17.241 -
  17.242 -    a1 = ((unsigned int*)&a)[0];
  17.243 -    a2 = ((unsigned int*)&a)[1];
  17.244 -
  17.245 -    res = a1/b0 +
  17.246 -        (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
  17.247 -        a2 / b0 +
  17.248 -        (a2 * (0xffffffff % b0)) / b0;
  17.249 -
  17.250 -    return res;
  17.251 -}
  17.252 -
  17.253  static void __init synchronize_tsc_bp (void)
  17.254  {
  17.255 -    int i;
  17.256 -    unsigned long long t0;
  17.257 -    unsigned long long sum, avg;
  17.258 -    long long delta;
  17.259 -    int buggy = 0;
  17.260 -
  17.261 -    printk("checking TSC synchronization across CPUs: ");
  17.262 -
  17.263 -    atomic_set(&tsc_start_flag, 1);
  17.264 -    wmb();
  17.265 -
  17.266 -    /*
  17.267 -     * We loop a few times to get a primed instruction cache,
  17.268 -     * then the last pass is more or less synchronized and
  17.269 -     * the BP and APs set their cycle counters to zero all at
  17.270 -     * once. This reduces the chance of having random offsets
  17.271 -     * between the processors, and guarantees that the maximum
  17.272 -     * delay between the cycle counters is never bigger than
  17.273 -     * the latency of information-passing (cachelines) between
  17.274 -     * two CPUs.
  17.275 -     */
  17.276 -    for (i = 0; i < NR_LOOPS; i++) {
  17.277 -        /*
  17.278 -         * all APs synchronize but they loop on '== num_cpus'
  17.279 -         */
  17.280 -        while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
  17.281 -        atomic_set(&tsc_count_stop, 0);
  17.282 -        wmb();
  17.283 -        /*
  17.284 -         * this lets the APs save their current TSC:
  17.285 -         */
  17.286 -        atomic_inc(&tsc_count_start);
  17.287 -
  17.288 -        rdtscll(tsc_values[smp_processor_id()]);
  17.289 -        /*
  17.290 -         * We clear the TSC in the last loop:
  17.291 -         */
  17.292 -        if (i == NR_LOOPS-1)
  17.293 -            write_tsc(0, 0);
  17.294 +	int i;
  17.295 +	unsigned long long t0;
  17.296 +	unsigned long long sum, avg;
  17.297 +	long long delta;
  17.298 +	unsigned long one_usec;
  17.299 +	int buggy = 0;
  17.300  
  17.301 -        /*
  17.302 -         * Wait for all APs to leave the synchronization point:
  17.303 -         */
  17.304 -        while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
  17.305 -        atomic_set(&tsc_count_start, 0);
  17.306 -        wmb();
  17.307 -        atomic_inc(&tsc_count_stop);
  17.308 -    }
  17.309 -
  17.310 -    sum = 0;
  17.311 -    for (i = 0; i < smp_num_cpus; i++) {
  17.312 -        t0 = tsc_values[i];
  17.313 -        sum += t0;
  17.314 -    }
  17.315 -    avg = div64(sum, smp_num_cpus);
  17.316 +	printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
  17.317  
  17.318 -    sum = 0;
  17.319 -    for (i = 0; i < smp_num_cpus; i++) {
  17.320 -        delta = tsc_values[i] - avg;
  17.321 -        if (delta < 0)
  17.322 -            delta = -delta;
  17.323 -        /*
  17.324 -         * We report bigger than 2 microseconds clock differences.
  17.325 -         */
  17.326 -        if (delta > 2*ticks_per_usec) {
  17.327 -            long realdelta;
  17.328 -            if (!buggy) {
  17.329 -                buggy = 1;
  17.330 -                printk("\n");
  17.331 -            }
  17.332 -            realdelta = div64(delta, ticks_per_usec);
  17.333 -            if (tsc_values[i] < avg)
  17.334 -                realdelta = -realdelta;
  17.335 +	/* convert from kcyc/sec to cyc/usec */
  17.336 +	one_usec = cpu_khz / 1000;
  17.337  
  17.338 -            printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
  17.339 -                   i, realdelta);
  17.340 -        }
  17.341 +	atomic_set(&tsc_start_flag, 1);
  17.342 +	wmb();
  17.343  
  17.344 -        sum += delta;
  17.345 -    }
  17.346 -    if (!buggy)
  17.347 -        printk("passed.\n");
  17.348 +	/*
  17.349 +	 * We loop a few times to get a primed instruction cache,
  17.350 +	 * then the last pass is more or less synchronized and
  17.351 +	 * the BP and APs set their cycle counters to zero all at
  17.352 +	 * once. This reduces the chance of having random offsets
  17.353 +	 * between the processors, and guarantees that the maximum
  17.354 +	 * delay between the cycle counters is never bigger than
  17.355 +	 * the latency of information-passing (cachelines) between
  17.356 +	 * two CPUs.
  17.357 +	 */
  17.358 +	for (i = 0; i < NR_LOOPS; i++) {
  17.359 +		/*
  17.360 +		 * all APs synchronize but they loop on '== num_cpus'
  17.361 +		 */
  17.362 +		while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
  17.363 +			mb();
  17.364 +		atomic_set(&tsc_count_stop, 0);
  17.365 +		wmb();
  17.366 +		/*
  17.367 +		 * this lets the APs save their current TSC:
  17.368 +		 */
  17.369 +		atomic_inc(&tsc_count_start);
  17.370 +
  17.371 +		rdtscll(tsc_values[smp_processor_id()]);
  17.372 +		/*
  17.373 +		 * We clear the TSC in the last loop:
  17.374 +		 */
  17.375 +		if (i == NR_LOOPS-1)
  17.376 +			write_tsc(0, 0);
  17.377 +
  17.378 +		/*
  17.379 +		 * Wait for all APs to leave the synchronization point:
  17.380 +		 */
  17.381 +		while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
  17.382 +			mb();
  17.383 +		atomic_set(&tsc_count_start, 0);
  17.384 +		wmb();
  17.385 +		atomic_inc(&tsc_count_stop);
  17.386 +	}
  17.387 +
  17.388 +	sum = 0;
  17.389 +	for (i = 0; i < NR_CPUS; i++) {
  17.390 +		if (cpu_isset(i, cpu_callout_map)) {
  17.391 +			t0 = tsc_values[i];
  17.392 +			sum += t0;
  17.393 +		}
  17.394 +	}
  17.395 +	avg = sum;
  17.396 +	do_div(avg, num_booting_cpus());
  17.397 +
  17.398 +	sum = 0;
  17.399 +	for (i = 0; i < NR_CPUS; i++) {
  17.400 +		if (!cpu_isset(i, cpu_callout_map))
  17.401 +			continue;
  17.402 +		delta = tsc_values[i] - avg;
  17.403 +		if (delta < 0)
  17.404 +			delta = -delta;
  17.405 +		/*
  17.406 +		 * We report bigger than 2 microseconds clock differences.
  17.407 +		 */
  17.408 +		if (delta > 2*one_usec) {
  17.409 +			long realdelta;
  17.410 +			if (!buggy) {
  17.411 +				buggy = 1;
  17.412 +				printk("\n");
  17.413 +			}
  17.414 +			realdelta = delta;
  17.415 +			do_div(realdelta, one_usec);
  17.416 +			if (tsc_values[i] < avg)
  17.417 +				realdelta = -realdelta;
  17.418 +
  17.419 +			printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
  17.420 +		}
  17.421 +
  17.422 +		sum += delta;
  17.423 +	}
  17.424 +	if (!buggy)
  17.425 +		printk("passed.\n");
  17.426  }
  17.427  
  17.428  static void __init synchronize_tsc_ap (void)
  17.429  {
  17.430 -    int i;
  17.431 -
  17.432 -    /*
  17.433 -     * smp_num_cpus is not necessarily known at the time
  17.434 -     * this gets called, so we first wait for the BP to
  17.435 -     * finish SMP initialization:
  17.436 -     */
  17.437 -    while (!atomic_read(&tsc_start_flag)) mb();
  17.438 +	int i;
  17.439  
  17.440 -    for (i = 0; i < NR_LOOPS; i++) {
  17.441 -        atomic_inc(&tsc_count_start);
  17.442 -        while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
  17.443 +	/*
  17.444 +	 * Not every cpu is online at the time
  17.445 +	 * this gets called, so we first wait for the BP to
  17.446 +	 * finish SMP initialization:
  17.447 +	 */
  17.448 +	while (!atomic_read(&tsc_start_flag)) mb();
  17.449  
  17.450 -        rdtscll(tsc_values[smp_processor_id()]);
  17.451 -        if (i == NR_LOOPS-1)
  17.452 -            write_tsc(0, 0);
  17.453 +	for (i = 0; i < NR_LOOPS; i++) {
  17.454 +		atomic_inc(&tsc_count_start);
  17.455 +		while (atomic_read(&tsc_count_start) != num_booting_cpus())
  17.456 +			mb();
  17.457  
  17.458 -        atomic_inc(&tsc_count_stop);
  17.459 -        while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
  17.460 -    }
  17.461 +		rdtscll(tsc_values[smp_processor_id()]);
  17.462 +		if (i == NR_LOOPS-1)
  17.463 +			write_tsc(0, 0);
  17.464 +
  17.465 +		atomic_inc(&tsc_count_stop);
  17.466 +		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
  17.467 +	}
  17.468  }
  17.469  #undef NR_LOOPS
  17.470  
  17.471 +extern void calibrate_delay(void);
  17.472 +
  17.473  static atomic_t init_deasserted;
  17.474  
  17.475  void __init smp_callin(void)
  17.476  {
  17.477 -    int cpuid, phys_id, i;
  17.478 -
  17.479 -    /*
  17.480 -     * If waken up by an INIT in an 82489DX configuration
  17.481 -     * we may get here before an INIT-deassert IPI reaches
  17.482 -     * our local APIC.  We have to wait for the IPI or we'll
  17.483 -     * lock up on an APIC access.
  17.484 -     */
  17.485 -    while (!atomic_read(&init_deasserted));
  17.486 -
  17.487 -    /*
  17.488 -     * (This works even if the APIC is not enabled.)
  17.489 -     */
  17.490 -    phys_id = GET_APIC_ID(apic_read(APIC_ID));
  17.491 -    cpuid = smp_processor_id();
  17.492 -    if (test_and_set_bit(cpuid, &cpu_online_map)) {
  17.493 -        printk("huh, phys CPU#%d, CPU#%d already present??\n",
  17.494 -               phys_id, cpuid);
  17.495 -        BUG();
  17.496 -    }
  17.497 -    Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
  17.498 -
  17.499 -    /*
  17.500 -     * STARTUP IPIs are fragile beasts as they might sometimes
  17.501 -     * trigger some glue motherboard logic. Complete APIC bus
  17.502 -     * silence for 1 second, this overestimates the time the
  17.503 -     * boot CPU is spending to send the up to 2 STARTUP IPIs
  17.504 -     * by a factor of two. This should be enough.
  17.505 -     */
  17.506 +	int cpuid, phys_id, i;
  17.507  
  17.508 -    for ( i = 0; i < 200; i++ )
  17.509 -    {
  17.510 -        if ( test_bit(cpuid, &cpu_callout_map) ) break;
  17.511 -        mdelay(10);
  17.512 -    }
  17.513 -
  17.514 -    if (!test_bit(cpuid, &cpu_callout_map)) {
  17.515 -        printk("BUG: CPU%d started up but did not get a callout!\n",
  17.516 -               cpuid);
  17.517 -        BUG();
  17.518 -    }
  17.519 -
  17.520 -    /*
  17.521 -     * the boot CPU has finished the init stage and is spinning
  17.522 -     * on callin_map until we finish. We are free to set up this
  17.523 -     * CPU, first the APIC. (this is probably redundant on most
  17.524 -     * boards)
  17.525 -     */
  17.526 +	/*
  17.527 +	 * If waken up by an INIT in an 82489DX configuration
  17.528 +	 * we may get here before an INIT-deassert IPI reaches
  17.529 +	 * our local APIC.  We have to wait for the IPI or we'll
  17.530 +	 * lock up on an APIC access.
  17.531 +	 */
  17.532 +	wait_for_init_deassert(&init_deasserted);
  17.533  
  17.534 -    Dprintk("CALLIN, before setup_local_APIC().\n");
  17.535 -
  17.536 -    setup_local_APIC();
  17.537 -
  17.538 -    __sti();
  17.539 -
  17.540 -    Dprintk("Stack at about %p\n",&cpuid);
  17.541 +	/*
  17.542 +	 * (This works even if the APIC is not enabled.)
  17.543 +	 */
  17.544 +	phys_id = GET_APIC_ID(apic_read(APIC_ID));
  17.545 +	cpuid = smp_processor_id();
  17.546 +	if (cpu_isset(cpuid, cpu_callin_map)) {
  17.547 +		printk("huh, phys CPU#%d, CPU#%d already present??\n",
  17.548 +					phys_id, cpuid);
  17.549 +		BUG();
  17.550 +	}
  17.551 +	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
  17.552  
  17.553 -    /*
  17.554 -     * Save our processor parameters
  17.555 -     */
  17.556 -    smp_store_cpu_info(cpuid);
  17.557 +	/*
  17.558 +	 * STARTUP IPIs are fragile beasts as they might sometimes
  17.559 +	 * trigger some glue motherboard logic. Complete APIC bus
  17.560 +	 * silence for 1 second, this overestimates the time the
  17.561 +	 * boot CPU is spending to send the up to 2 STARTUP IPIs
  17.562 +	 * by a factor of two. This should be enough.
  17.563 +	 */
  17.564  
  17.565 -    /*
  17.566 -     * Allow the master to continue.
  17.567 -     */
  17.568 -    set_bit(cpuid, &cpu_callin_map);
  17.569 +	/*
  17.570 +	 * Waiting 2s total for startup
  17.571 +	 */
  17.572 +	for (i = 0; i < 200; i++) {
  17.573 +		/*
  17.574 +		 * Has the boot CPU finished it's STARTUP sequence?
  17.575 +		 */
  17.576 +		if (cpu_isset(cpuid, cpu_callout_map))
  17.577 +			break;
  17.578 +		rep_nop();
  17.579 +		mdelay(10);
  17.580 +	}
  17.581  
  17.582 -    /*
  17.583 -     *      Synchronize the TSC with the BP
  17.584 -     */
  17.585 -    synchronize_tsc_ap();
  17.586 +	if (!cpu_isset(cpuid, cpu_callout_map)) {
  17.587 +		printk("BUG: CPU%d started up but did not get a callout!\n",
  17.588 +			cpuid);
  17.589 +		BUG();
  17.590 +	}
  17.591 +
  17.592 +	/*
  17.593 +	 * the boot CPU has finished the init stage and is spinning
  17.594 +	 * on callin_map until we finish. We are free to set up this
  17.595 +	 * CPU, first the APIC. (this is probably redundant on most
  17.596 +	 * boards)
  17.597 +	 */
  17.598 +
  17.599 +	Dprintk("CALLIN, before setup_local_APIC().\n");
  17.600 +	smp_callin_clear_local_apic();
  17.601 +	setup_local_APIC();
  17.602 +	map_cpu_to_logical_apicid();
  17.603 +
  17.604 +#if 0
  17.605 +	/*
  17.606 +	 * Get our bogomips.
  17.607 +	 */
  17.608 +	calibrate_delay();
  17.609 +	Dprintk("Stack at about %p\n",&cpuid);
  17.610 +#endif
  17.611 +
  17.612 +	/*
  17.613 +	 * Save our processor parameters
  17.614 +	 */
  17.615 + 	smp_store_cpu_info(cpuid);
  17.616 +
  17.617 +	disable_APIC_timer();
  17.618 +
  17.619 +	/*
  17.620 +	 * Allow the master to continue.
  17.621 +	 */
  17.622 +	cpu_set(cpuid, cpu_callin_map);
  17.623 +
  17.624 +	/*
  17.625 +	 *      Synchronize the TSC with the BP
  17.626 +	 */
  17.627 +	if (cpu_has_tsc && cpu_khz)
  17.628 +		synchronize_tsc_ap();
  17.629  }
  17.630  
  17.631 -static int cpucount;
  17.632 +int cpucount;
  17.633  
  17.634 -#ifdef __i386__
  17.635 +#ifdef CONFIG_X86_32
  17.636  static void construct_percpu_idt(unsigned int cpu)
  17.637  {
  17.638 -    unsigned char idt_load[10];
  17.639 +	unsigned char idt_load[10];
  17.640  
  17.641 -    idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
  17.642 -    memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
  17.643 +	idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
  17.644 +	memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES*sizeof(idt_entry_t));
  17.645  
  17.646 -    *(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
  17.647 -    *(unsigned long  *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
  17.648 -    __asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
  17.649 +	*(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
  17.650 +	*(unsigned long  *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
  17.651 +	__asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
  17.652  }
  17.653  #endif
  17.654  
  17.655  /*
  17.656   * Activate a secondary processor.
  17.657   */
  17.658 -void __init start_secondary(void)
  17.659 +void __init start_secondary(void *unused)
  17.660  {
  17.661 -    unsigned int cpu = cpucount;
  17.662 -
  17.663 -    extern void percpu_traps_init(void);
  17.664 -    extern void cpu_init(void);
  17.665 -
  17.666 -    set_current(idle_task[cpu]);
  17.667 -    set_processor_id(cpu);
  17.668 -
  17.669 -    percpu_traps_init();
  17.670 +	unsigned int cpu = cpucount;
  17.671  
  17.672 -    cpu_init();
  17.673 -
  17.674 -    smp_callin();
  17.675 +	extern void percpu_traps_init(void);
  17.676 +	extern void cpu_init(void);
  17.677  
  17.678 -    while (!atomic_read(&smp_commenced))
  17.679 -        cpu_relax();
  17.680 +	set_current(idle_task[cpu]);
  17.681 +	set_processor_id(cpu);
  17.682  
  17.683 -#ifdef __i386__
  17.684 -    /*
  17.685 -     * At this point, boot CPU has fully initialised the IDT. It is
  17.686 -     * now safe to make ourselves a private copy.
  17.687 -     */
  17.688 -    construct_percpu_idt(cpu);
  17.689 +	percpu_traps_init();
  17.690 +
  17.691 +	cpu_init();
  17.692 +	smp_callin();
  17.693 +	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
  17.694 +		rep_nop();
  17.695 +
  17.696 +#ifdef CONFIG_X86_32
  17.697 +	/*
  17.698 +	 * At this point, boot CPU has fully initialised the IDT. It is
  17.699 +	 * now safe to make ourselves a private copy.
  17.700 +	 */
  17.701 +	construct_percpu_idt(cpu);
  17.702  #endif
  17.703  
  17.704 -    local_flush_tlb();
  17.705 +	setup_secondary_APIC_clock();
  17.706 +	enable_APIC_timer();
  17.707  
  17.708 -    startup_cpu_idle_loop();
  17.709 +	/*
  17.710 +	 * low-memory mappings have been cleared, flush them from
  17.711 +	 * the local TLBs too.
  17.712 +	 */
  17.713 +	local_flush_tlb();
  17.714 +	cpu_set(smp_processor_id(), cpu_online_map);
  17.715  
  17.716 -    BUG();
  17.717 +	/* We can take interrupts now: we're officially "up". */
  17.718 +	local_irq_enable();
  17.719 +
  17.720 +	wmb();
  17.721 +	startup_cpu_idle_loop();
  17.722  }
  17.723  
  17.724  extern struct {
  17.725 -    unsigned long esp, ss;
  17.726 +	void * esp;
  17.727 +	unsigned short ss;
  17.728  } stack_start;
  17.729  
  17.730 -/* which physical APIC ID maps to which logical CPU number */
  17.731 -volatile int physical_apicid_2_cpu[MAX_APICID];
  17.732 -/* which logical CPU number maps to which physical APIC ID */
  17.733 -volatile int cpu_2_physical_apicid[NR_CPUS];
  17.734 -
  17.735 -/* which logical APIC ID maps to which logical CPU number */
  17.736 -volatile int logical_apicid_2_cpu[MAX_APICID];
  17.737 -/* which logical CPU number maps to which logical APIC ID */
  17.738 -volatile int cpu_2_logical_apicid[NR_CPUS];
  17.739 +#ifdef CONFIG_NUMA
  17.740  
  17.741 -static inline void init_cpu_to_apicid(void)
  17.742 -/* Initialize all maps between cpu number and apicids */
  17.743 -{
  17.744 -    int apicid, cpu;
  17.745 +/* which logical CPUs are on which nodes */
  17.746 +cpumask_t node_2_cpu_mask[MAX_NUMNODES] =
  17.747 +				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
  17.748 +/* which node each logical CPU is on */
  17.749 +int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 };
  17.750 +EXPORT_SYMBOL(cpu_2_node);
  17.751  
  17.752 -    for (apicid = 0; apicid < MAX_APICID; apicid++) {
  17.753 -        physical_apicid_2_cpu[apicid] = -1;
  17.754 -        logical_apicid_2_cpu[apicid] = -1;
  17.755 -    }
  17.756 -    for (cpu = 0; cpu < NR_CPUS; cpu++) {
  17.757 -        cpu_2_physical_apicid[cpu] = -1;
  17.758 -        cpu_2_logical_apicid[cpu] = -1;
  17.759 -    }
  17.760 +/* set up a mapping between cpu and node. */
  17.761 +static inline void map_cpu_to_node(int cpu, int node)
  17.762 +{
  17.763 +	printk("Mapping cpu %d to node %d\n", cpu, node);
  17.764 +	cpu_set(cpu, node_2_cpu_mask[node]);
  17.765 +	cpu_2_node[cpu] = node;
  17.766  }
  17.767  
  17.768 -static inline void map_cpu_to_boot_apicid(int cpu, int apicid)
  17.769 -/* 
  17.770 - * set up a mapping between cpu and apicid. Uses logical apicids for multiquad,
  17.771 - * else physical apic ids
  17.772 - */
  17.773 +/* undo a mapping between cpu and node. */
  17.774 +static inline void unmap_cpu_to_node(int cpu)
  17.775  {
  17.776 -    physical_apicid_2_cpu[apicid] = cpu;	
  17.777 -    cpu_2_physical_apicid[cpu] = apicid;
  17.778 +	int node;
  17.779 +
  17.780 +	printk("Unmapping cpu %d from all nodes\n", cpu);
  17.781 +	for (node = 0; node < MAX_NUMNODES; node ++)
  17.782 +		cpu_clear(cpu, node_2_cpu_mask[node]);
  17.783 +	cpu_2_node[cpu] = 0;
  17.784 +}
  17.785 +#else /* !CONFIG_NUMA */
  17.786 +
  17.787 +#define map_cpu_to_node(cpu, node)	({})
  17.788 +#define unmap_cpu_to_node(cpu)	({})
  17.789 +
  17.790 +#endif /* CONFIG_NUMA */
  17.791 +
  17.792 +u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
  17.793 +
  17.794 +void map_cpu_to_logical_apicid(void)
  17.795 +{
  17.796 +	int cpu = smp_processor_id();
  17.797 +	int apicid = logical_smp_processor_id();
  17.798 +
  17.799 +	cpu_2_logical_apicid[cpu] = apicid;
  17.800 +	map_cpu_to_node(cpu, apicid_to_node(apicid));
  17.801  }
  17.802  
  17.803 -static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid)
  17.804 -/* 
  17.805 - * undo a mapping between cpu and apicid. Uses logical apicids for multiquad,
  17.806 - * else physical apic ids
  17.807 - */
  17.808 +void unmap_cpu_to_logical_apicid(int cpu)
  17.809  {
  17.810 -    physical_apicid_2_cpu[apicid] = -1;	
  17.811 -    cpu_2_physical_apicid[cpu] = -1;
  17.812 +	cpu_2_logical_apicid[cpu] = BAD_APICID;
  17.813 +	unmap_cpu_to_node(cpu);
  17.814  }
  17.815  
  17.816  #if APIC_DEBUG
  17.817 -static inline void inquire_remote_apic(int apicid)
  17.818 +static inline void __inquire_remote_apic(int apicid)
  17.819  {
  17.820 -    int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
  17.821 -    char *names[] = { "ID", "VERSION", "SPIV" };
  17.822 -    int timeout, status;
  17.823 -
  17.824 -    printk("Inquiring remote APIC #%d...\n", apicid);
  17.825 -
  17.826 -    for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
  17.827 -        printk("... APIC #%d %s: ", apicid, names[i]);
  17.828 -
  17.829 -        /*
  17.830 -         * Wait for idle.
  17.831 -         */
  17.832 -        apic_wait_icr_idle();
  17.833 +	int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
  17.834 +	char *names[] = { "ID", "VERSION", "SPIV" };
  17.835 +	int timeout, status;
  17.836  
  17.837 -        apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
  17.838 -        apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
  17.839 -
  17.840 -        timeout = 0;
  17.841 -        do {
  17.842 -            udelay(100);
  17.843 -            status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
  17.844 -        } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
  17.845 +	printk("Inquiring remote APIC #%d...\n", apicid);
  17.846  
  17.847 -        switch (status) {
  17.848 -        case APIC_ICR_RR_VALID:
  17.849 -            status = apic_read(APIC_RRR);
  17.850 -            printk("%08x\n", status);
  17.851 -            break;
  17.852 -        default:
  17.853 -            printk("failed\n");
  17.854 -        }
  17.855 -    }
  17.856 +	for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
  17.857 +		printk("... APIC #%d %s: ", apicid, names[i]);
  17.858 +
  17.859 +		/*
  17.860 +		 * Wait for idle.
  17.861 +		 */
  17.862 +		apic_wait_icr_idle();
  17.863 +
  17.864 +		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
  17.865 +		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
  17.866 +
  17.867 +		timeout = 0;
  17.868 +		do {
  17.869 +			udelay(100);
  17.870 +			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
  17.871 +		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
  17.872 +
  17.873 +		switch (status) {
  17.874 +		case APIC_ICR_RR_VALID:
  17.875 +			status = apic_read(APIC_RRR);
  17.876 +			printk("%08x\n", status);
  17.877 +			break;
  17.878 +		default:
  17.879 +			printk("failed\n");
  17.880 +		}
  17.881 +	}
  17.882  }
  17.883  #endif
  17.884  
  17.885 -
  17.886 -static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip)
  17.887 +#ifdef WAKE_SECONDARY_VIA_NMI
  17.888 +/* 
  17.889 + * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
  17.890 + * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  17.891 + * won't ... remember to clear down the APIC, etc later.
  17.892 + */
  17.893 +static int __init
  17.894 +wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
  17.895  {
  17.896 -    unsigned long send_status = 0, accept_status = 0;
  17.897 -    int maxlvt, timeout, num_starts, j;
  17.898 -
  17.899 -    Dprintk("Asserting INIT.\n");
  17.900 -
  17.901 -    /*
  17.902 -     * Turn INIT on target chip
  17.903 -     */
  17.904 -    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  17.905 -
  17.906 -    /*
  17.907 -     * Send IPI
  17.908 -     */
  17.909 -    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
  17.910 -                      | APIC_DM_INIT);
  17.911 -
  17.912 -    Dprintk("Waiting for send to finish...\n");
  17.913 -    timeout = 0;
  17.914 -    do {
  17.915 -        Dprintk("+");
  17.916 -        udelay(100);
  17.917 -        send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
  17.918 -    } while (send_status && (timeout++ < 1000));
  17.919 -
  17.920 -    mdelay(10);
  17.921 -
  17.922 -    Dprintk("Deasserting INIT.\n");
  17.923 -
  17.924 -    /* Target chip */
  17.925 -    apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  17.926 -
  17.927 -    /* Send IPI */
  17.928 -    apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
  17.929 -
  17.930 -    Dprintk("Waiting for send to finish...\n");
  17.931 -    timeout = 0;
  17.932 -    do {
  17.933 -        Dprintk("+");
  17.934 -        udelay(100);
  17.935 -        send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
  17.936 -    } while (send_status && (timeout++ < 1000));
  17.937 -
  17.938 -    atomic_set(&init_deasserted, 1);
  17.939 -
  17.940 -    /*
  17.941 -     * Should we send STARTUP IPIs ?
  17.942 -     *
  17.943 -     * Determine this based on the APIC version.
  17.944 -     * If we don't have an integrated APIC, don't send the STARTUP IPIs.
  17.945 -     */
  17.946 -    if (APIC_INTEGRATED(apic_version[phys_apicid]))
  17.947 -        num_starts = 2;
  17.948 -    else
  17.949 -        num_starts = 0;
  17.950 -
  17.951 -    /*
  17.952 -     * Run STARTUP IPI loop.
  17.953 -     */
  17.954 -    Dprintk("#startup loops: %d.\n", num_starts);
  17.955 +	unsigned long send_status = 0, accept_status = 0;
  17.956 +	int timeout, maxlvt;
  17.957  
  17.958 -    maxlvt = get_maxlvt();
  17.959 -
  17.960 -    for (j = 1; j <= num_starts; j++) {
  17.961 -        Dprintk("Sending STARTUP #%d.\n",j);
  17.962 -
  17.963 -        apic_read_around(APIC_SPIV);
  17.964 -        apic_write(APIC_ESR, 0);
  17.965 -        apic_read(APIC_ESR);
  17.966 -        Dprintk("After apic_write.\n");
  17.967 -
  17.968 -        /*
  17.969 -         * STARTUP IPI
  17.970 -         */
  17.971 -
  17.972 -        /* Target chip */
  17.973 -        apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
  17.974 -
  17.975 -        /* Boot on the stack */
  17.976 -        /* Kick the second */
  17.977 -        apic_write_around(APIC_ICR, APIC_DM_STARTUP
  17.978 -                          | (start_eip >> 12));
  17.979 -
  17.980 -        /*
  17.981 -         * Give the other CPU some time to accept the IPI.
  17.982 -         */
  17.983 -        udelay(300);
  17.984 -
  17.985 -        Dprintk("Startup point 1.\n");
  17.986 +	/* Target chip */
  17.987 +	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
  17.988  
  17.989 -        Dprintk("Waiting for send to finish...\n");
  17.990 -        timeout = 0;
  17.991 -        do {
  17.992 -            Dprintk("+");
  17.993 -            udelay(100);
  17.994 -            send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
  17.995 -        } while (send_status && (timeout++ < 1000));
  17.996 +	/* Boot on the stack */
  17.997 +	/* Kick the second */
  17.998 +	apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
  17.999  
 17.1000 -        /*
 17.1001 -         * Give the other CPU some time to accept the IPI.
 17.1002 -         */
 17.1003 -        udelay(200);
 17.1004 -        /*
 17.1005 -         * Due to the Pentium erratum 3AP.
 17.1006 -         */
 17.1007 -        if (maxlvt > 3) {
 17.1008 -            apic_read_around(APIC_SPIV);
 17.1009 -            apic_write(APIC_ESR, 0);
 17.1010 -        }
 17.1011 -        accept_status = (apic_read(APIC_ESR) & 0xEF);
 17.1012 -        if (send_status || accept_status)
 17.1013 -            break;
 17.1014 -    }
 17.1015 -    Dprintk("After Startup.\n");
 17.1016 +	Dprintk("Waiting for send to finish...\n");
 17.1017 +	timeout = 0;
 17.1018 +	do {
 17.1019 +		Dprintk("+");
 17.1020 +		udelay(100);
 17.1021 +		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 17.1022 +	} while (send_status && (timeout++ < 1000));
 17.1023  
 17.1024 -    if (send_status)
 17.1025 -        printk("APIC never delivered???\n");
 17.1026 -    if (accept_status)
 17.1027 -        printk("APIC delivery error (%lx).\n", accept_status);
 17.1028 +	/*
 17.1029 +	 * Give the other CPU some time to accept the IPI.
 17.1030 +	 */
 17.1031 +	udelay(200);
 17.1032 +	/*
 17.1033 +	 * Due to the Pentium erratum 3AP.
 17.1034 +	 */
 17.1035 +	maxlvt = get_maxlvt();
 17.1036 +	if (maxlvt > 3) {
 17.1037 +		apic_read_around(APIC_SPIV);
 17.1038 +		apic_write(APIC_ESR, 0);
 17.1039 +	}
 17.1040 +	accept_status = (apic_read(APIC_ESR) & 0xEF);
 17.1041 +	Dprintk("NMI sent.\n");
 17.1042  
 17.1043 -    return (send_status | accept_status);
 17.1044 -}
 17.1045 +	if (send_status)
 17.1046 +		printk("APIC never delivered???\n");
 17.1047 +	if (accept_status)
 17.1048 +		printk("APIC delivery error (%lx).\n", accept_status);
 17.1049  
 17.1050 -extern unsigned long cpu_initialized;
 17.1051 +	return (send_status | accept_status);
 17.1052 +}
 17.1053 +#endif	/* WAKE_SECONDARY_VIA_NMI */
 17.1054  
 17.1055 -static void __init do_boot_cpu (int apicid) 
 17.1056 +#ifdef WAKE_SECONDARY_VIA_INIT
 17.1057 +static int __init
 17.1058 +wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 17.1059 +{
 17.1060 +	unsigned long send_status = 0, accept_status = 0;
 17.1061 +	int maxlvt, timeout, num_starts, j;
 17.1062 +
 17.1063 +	/*
 17.1064 +	 * Be paranoid about clearing APIC errors.
 17.1065 +	 */
 17.1066 +	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
 17.1067 +		apic_read_around(APIC_SPIV);
 17.1068 +		apic_write(APIC_ESR, 0);
 17.1069 +		apic_read(APIC_ESR);
 17.1070 +	}
 17.1071 +
 17.1072 +	Dprintk("Asserting INIT.\n");
 17.1073 +
 17.1074 +	/*
 17.1075 +	 * Turn INIT on target chip
 17.1076 +	 */
 17.1077 +	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 17.1078 +
 17.1079 +	/*
 17.1080 +	 * Send IPI
 17.1081 +	 */
 17.1082 +	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
 17.1083 +				| APIC_DM_INIT);
 17.1084 +
 17.1085 +	Dprintk("Waiting for send to finish...\n");
 17.1086 +	timeout = 0;
 17.1087 +	do {
 17.1088 +		Dprintk("+");
 17.1089 +		udelay(100);
 17.1090 +		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 17.1091 +	} while (send_status && (timeout++ < 1000));
 17.1092 +
 17.1093 +	mdelay(10);
 17.1094 +
 17.1095 +	Dprintk("Deasserting INIT.\n");
 17.1096 +
 17.1097 +	/* Target chip */
 17.1098 +	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 17.1099 +
 17.1100 +	/* Send IPI */
 17.1101 +	apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
 17.1102 +
 17.1103 +	Dprintk("Waiting for send to finish...\n");
 17.1104 +	timeout = 0;
 17.1105 +	do {
 17.1106 +		Dprintk("+");
 17.1107 +		udelay(100);
 17.1108 +		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 17.1109 +	} while (send_status && (timeout++ < 1000));
 17.1110 +
 17.1111 +	atomic_set(&init_deasserted, 1);
 17.1112 +
 17.1113 +	/*
 17.1114 +	 * Should we send STARTUP IPIs ?
 17.1115 +	 *
 17.1116 +	 * Determine this based on the APIC version.
 17.1117 +	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
 17.1118 +	 */
 17.1119 +	if (APIC_INTEGRATED(apic_version[phys_apicid]))
 17.1120 +		num_starts = 2;
 17.1121 +	else
 17.1122 +		num_starts = 0;
 17.1123 +
 17.1124 +	/*
 17.1125 +	 * Run STARTUP IPI loop.
 17.1126 +	 */
 17.1127 +	Dprintk("#startup loops: %d.\n", num_starts);
 17.1128 +
 17.1129 +	maxlvt = get_maxlvt();
 17.1130 +
 17.1131 +	for (j = 1; j <= num_starts; j++) {
 17.1132 +		Dprintk("Sending STARTUP #%d.\n",j);
 17.1133 +		apic_read_around(APIC_SPIV);
 17.1134 +		apic_write(APIC_ESR, 0);
 17.1135 +		apic_read(APIC_ESR);
 17.1136 +		Dprintk("After apic_write.\n");
 17.1137 +
 17.1138 +		/*
 17.1139 +		 * STARTUP IPI
 17.1140 +		 */
 17.1141 +
 17.1142 +		/* Target chip */
 17.1143 +		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 17.1144 +
 17.1145 +		/* Boot on the stack */
 17.1146 +		/* Kick the second */
 17.1147 +		apic_write_around(APIC_ICR, APIC_DM_STARTUP
 17.1148 +					| (start_eip >> 12));
 17.1149 +
 17.1150 +		/*
 17.1151 +		 * Give the other CPU some time to accept the IPI.
 17.1152 +		 */
 17.1153 +		udelay(300);
 17.1154 +
 17.1155 +		Dprintk("Startup point 1.\n");
 17.1156 +
 17.1157 +		Dprintk("Waiting for send to finish...\n");
 17.1158 +		timeout = 0;
 17.1159 +		do {
 17.1160 +			Dprintk("+");
 17.1161 +			udelay(100);
 17.1162 +			send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
 17.1163 +		} while (send_status && (timeout++ < 1000));
 17.1164 +
 17.1165 +		/*
 17.1166 +		 * Give the other CPU some time to accept the IPI.
 17.1167 +		 */
 17.1168 +		udelay(200);
 17.1169 +		/*
 17.1170 +		 * Due to the Pentium erratum 3AP.
 17.1171 +		 */
 17.1172 +		if (maxlvt > 3) {
 17.1173 +			apic_read_around(APIC_SPIV);
 17.1174 +			apic_write(APIC_ESR, 0);
 17.1175 +		}
 17.1176 +		accept_status = (apic_read(APIC_ESR) & 0xEF);
 17.1177 +		if (send_status || accept_status)
 17.1178 +			break;
 17.1179 +	}
 17.1180 +	Dprintk("After Startup.\n");
 17.1181 +
 17.1182 +	if (send_status)
 17.1183 +		printk("APIC never delivered???\n");
 17.1184 +	if (accept_status)
 17.1185 +		printk("APIC delivery error (%lx).\n", accept_status);
 17.1186 +
 17.1187 +	return (send_status | accept_status);
 17.1188 +}
 17.1189 +#endif	/* WAKE_SECONDARY_VIA_INIT */
 17.1190 +
 17.1191 +extern cpumask_t cpu_initialized;
 17.1192 +
 17.1193 +static int __init do_boot_cpu(int apicid)
 17.1194  /*
 17.1195   * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
 17.1196   * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
 17.1197 + * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
 17.1198   */
 17.1199  {
 17.1200 -    struct domain *idle;
 17.1201 -    struct exec_domain *ed;
 17.1202 -    unsigned long boot_error = 0;
 17.1203 -    int timeout, cpu;
 17.1204 -    unsigned long start_eip;
 17.1205 -    void *stack;
 17.1206 -
 17.1207 -    cpu = ++cpucount;
 17.1208 -
 17.1209 -    if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
 17.1210 -        panic("failed 'createdomain' for CPU %d", cpu);
 17.1211 -
 17.1212 -    ed = idle->exec_domain[0];
 17.1213 -
 17.1214 -    set_bit(_DOMF_idle_domain, &idle->domain_flags);
 17.1215 -
 17.1216 -    ed->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
 17.1217 -
 17.1218 -    map_cpu_to_boot_apicid(cpu, apicid);
 17.1219 -
 17.1220 -    idle_task[cpu] = ed;
 17.1221 -
 17.1222 -    /* start_eip had better be page-aligned! */
 17.1223 -    start_eip = setup_trampoline();
 17.1224 -
 17.1225 -    /* So we see what's up. */
 17.1226 -    printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
 17.1227 -
 17.1228 -    stack = (void *)alloc_xenheap_pages(STACK_ORDER);
 17.1229 -#if defined(__i386__)
 17.1230 -    stack_start.esp = __pa(stack);
 17.1231 -#elif defined(__x86_64__)
 17.1232 -    stack_start.esp = (unsigned long)stack;
 17.1233 -#endif
 17.1234 -    stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
 17.1235 -
 17.1236 -    /* Debug build: detect stack overflow by setting up a guard page. */
 17.1237 -    memguard_guard_stack(stack);
 17.1238 -
 17.1239 -    /*
 17.1240 -     * This grunge runs the startup process for
 17.1241 -     * the targeted processor.
 17.1242 -     */
 17.1243 -
 17.1244 -    atomic_set(&init_deasserted, 0);
 17.1245 -
 17.1246 -    Dprintk("Setting warm reset code and vector.\n");
 17.1247 -
 17.1248 -    CMOS_WRITE(0xa, 0xf);
 17.1249 -    local_flush_tlb();
 17.1250 -    Dprintk("1.\n");
 17.1251 -    *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
 17.1252 -    Dprintk("2.\n");
 17.1253 -    *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf;
 17.1254 -    Dprintk("3.\n");
 17.1255 +	struct domain *idle;
 17.1256 +	struct exec_domain *ed;
 17.1257 +	void *stack;
 17.1258 +	unsigned long boot_error;
 17.1259 +	int timeout, cpu;
 17.1260 +	unsigned long start_eip;
 17.1261 +	unsigned short nmi_high = 0, nmi_low = 0;
 17.1262  
 17.1263 -    /*
 17.1264 -     * Be paranoid about clearing APIC errors.
 17.1265 -     */
 17.1266 -    if ( APIC_INTEGRATED(apic_version[apicid]) )
 17.1267 -    {
 17.1268 -        apic_read_around(APIC_SPIV);
 17.1269 -        apic_write(APIC_ESR, 0);
 17.1270 -        apic_read(APIC_ESR);
 17.1271 -    }
 17.1272 -
 17.1273 -    /*
 17.1274 -     * Status is now clean
 17.1275 -     */
 17.1276 -    boot_error = 0;
 17.1277 -
 17.1278 -    /*
 17.1279 -     * Starting actual IPI sequence...
 17.1280 -     */
 17.1281 -
 17.1282 -    boot_error = wakeup_secondary_via_INIT(apicid, start_eip);
 17.1283 -
 17.1284 -    if (!boot_error) {
 17.1285 -        /*
 17.1286 -         * allow APs to start initializing.
 17.1287 -         */
 17.1288 -        Dprintk("Before Callout %d.\n", cpu);
 17.1289 -        set_bit(cpu, &cpu_callout_map);
 17.1290 -        Dprintk("After Callout %d.\n", cpu);
 17.1291 +	cpu = ++cpucount;
 17.1292  
 17.1293 -        /*
 17.1294 -         * Wait 5s total for a response
 17.1295 -         */
 17.1296 -        for (timeout = 0; timeout < 50000; timeout++) {
 17.1297 -            if (test_bit(cpu, &cpu_callin_map))
 17.1298 -                break;	/* It has booted */
 17.1299 -            udelay(100);
 17.1300 -        }
 17.1301 +	if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL )
 17.1302 +		panic("failed 'createdomain' for CPU %d", cpu);
 17.1303  
 17.1304 -        if (test_bit(cpu, &cpu_callin_map)) {
 17.1305 -            /* number CPUs logically, starting from 1 (BSP is 0) */
 17.1306 -            printk("CPU%d has booted.\n", cpu);
 17.1307 -        } else {
 17.1308 -            boot_error= 1;
 17.1309 -            if (*((volatile unsigned int *)phys_to_virt(start_eip))
 17.1310 -                == 0xA5A5A5A5)
 17.1311 +	ed = idle_task[cpu] = idle->exec_domain[0];
 17.1312 +
 17.1313 +	set_bit(_DOMF_idle_domain, &idle->domain_flags);
 17.1314 +
 17.1315 +	ed->arch.monitor_table = mk_pagetable(__pa(idle_pg_table));
 17.1316 +
 17.1317 +	/* start_eip had better be page-aligned! */
 17.1318 +	start_eip = setup_trampoline();
 17.1319 +
 17.1320 +	/* So we see what's up   */
 17.1321 +	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
 17.1322 +
 17.1323 +	stack = (void *)alloc_xenheap_pages(STACK_ORDER);
 17.1324 +#if defined(__i386__)
 17.1325 +	stack_start.esp = (void *)__pa(stack);
 17.1326 +#elif defined(__x86_64__)
 17.1327 +	stack_start.esp = stack;
 17.1328 +#endif
 17.1329 +	stack_start.esp += STACK_SIZE - sizeof(struct cpu_info);
 17.1330 +
 17.1331 +	/* Debug build: detect stack overflow by setting up a guard page. */
 17.1332 +	memguard_guard_stack(stack);
 17.1333 +
 17.1334 +	/*
 17.1335 +	 * This grunge runs the startup process for
 17.1336 +	 * the targeted processor.
 17.1337 +	 */
 17.1338 +
 17.1339 +	atomic_set(&init_deasserted, 0);
 17.1340 +
 17.1341 +	Dprintk("Setting warm reset code and vector.\n");
 17.1342 +
 17.1343 +	store_NMI_vector(&nmi_high, &nmi_low);
 17.1344 +
 17.1345 +	smpboot_setup_warm_reset_vector(start_eip);
 17.1346 +
 17.1347 +	/*
 17.1348 +	 * Starting actual IPI sequence...
 17.1349 +	 */
 17.1350 +	boot_error = wakeup_secondary_cpu(apicid, start_eip);
 17.1351 +
 17.1352 +	if (!boot_error) {
 17.1353 +		/*
 17.1354 +		 * allow APs to start initializing.
 17.1355 +		 */
 17.1356 +		Dprintk("Before Callout %d.\n", cpu);
 17.1357 +		cpu_set(cpu, cpu_callout_map);
 17.1358 +		Dprintk("After Callout %d.\n", cpu);
 17.1359 +
 17.1360 +		/*
 17.1361 +		 * Wait 5s total for a response
 17.1362 +		 */
 17.1363 +		for (timeout = 0; timeout < 50000; timeout++) {
 17.1364 +			if (cpu_isset(cpu, cpu_callin_map))
 17.1365 +				break;	/* It has booted */
 17.1366 +			udelay(100);
 17.1367 +		}
 17.1368 +
 17.1369 +		if (cpu_isset(cpu, cpu_callin_map)) {
 17.1370 +			/* number CPUs logically, starting from 1 (BSP is 0) */
 17.1371 +			Dprintk("OK.\n");
 17.1372 +			printk("CPU%d: ", cpu);
 17.1373 +			print_cpu_info(&cpu_data[cpu]);
 17.1374 +			Dprintk("CPU has booted.\n");
 17.1375 +		} else {
 17.1376 +			boot_error= 1;
 17.1377 +			if (*((volatile unsigned char *)trampoline_base)
 17.1378 +					== 0xA5)
 17.1379  				/* trampoline started but...? */
 17.1380 -                printk("Stuck ??\n");
 17.1381 -            else
 17.1382 +				printk("Stuck ??\n");
 17.1383 +			else
 17.1384  				/* trampoline code not run */
 17.1385 -                printk("Not responding.\n");
 17.1386 -#if APIC_DEBUG
 17.1387 -            inquire_remote_apic(apicid);
 17.1388 -#endif
 17.1389 -        }
 17.1390 -    }
 17.1391 -    if (boot_error) {
 17.1392 -        /* Try to put things back the way they were before ... */
 17.1393 -        unmap_cpu_to_boot_apicid(cpu, apicid);
 17.1394 -        clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
 17.1395 -        clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
 17.1396 -        clear_bit(cpu, &cpu_online_map);  /* was set in smp_callin() */
 17.1397 -        cpucount--;
 17.1398 -    }
 17.1399 +				printk("Not responding.\n");
 17.1400 +			inquire_remote_apic(apicid);
 17.1401 +		}
 17.1402 +	}
 17.1403 +	x86_cpu_to_apicid[cpu] = apicid;
 17.1404 +	if (boot_error) {
 17.1405 +		/* Try to put things back the way they were before ... */
 17.1406 +		unmap_cpu_to_logical_apicid(cpu);
 17.1407 +		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
 17.1408 +		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
 17.1409 +		cpucount--;
 17.1410 +	}
 17.1411 +
 17.1412 +	/* mark "stuck" area as not stuck */
 17.1413 +	*((volatile unsigned long *)trampoline_base) = 0;
 17.1414 +
 17.1415 +	return boot_error;
 17.1416  }
 17.1417  
 17.1418 +#if 0
 17.1419 +cycles_t cacheflush_time;
 17.1420 +unsigned long cache_decay_ticks;
 17.1421 +
 17.1422 +static void smp_tune_scheduling (void)
 17.1423 +{
 17.1424 +	unsigned long cachesize;       /* kB   */
 17.1425 +	unsigned long bandwidth = 350; /* MB/s */
 17.1426 +	/*
 17.1427 +	 * Rough estimation for SMP scheduling, this is the number of
 17.1428 +	 * cycles it takes for a fully memory-limited process to flush
 17.1429 +	 * the SMP-local cache.
 17.1430 +	 *
 17.1431 +	 * (For a P5 this pretty much means we will choose another idle
 17.1432 +	 *  CPU almost always at wakeup time (this is due to the small
 17.1433 +	 *  L1 cache), on PIIs it's around 50-100 usecs, depending on
 17.1434 +	 *  the cache size)
 17.1435 +	 */
 17.1436 +
 17.1437 +	if (!cpu_khz) {
 17.1438 +		/*
 17.1439 +		 * this basically disables processor-affinity
 17.1440 +		 * scheduling on SMP without a TSC.
 17.1441 +		 */
 17.1442 +		cacheflush_time = 0;
 17.1443 +		return;
 17.1444 +	} else {
 17.1445 +		cachesize = boot_cpu_data.x86_cache_size;
 17.1446 +		if (cachesize == -1) {
 17.1447 +			cachesize = 16; /* Pentiums, 2x8kB cache */
 17.1448 +			bandwidth = 100;
 17.1449 +		}
 17.1450 +
 17.1451 +		cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
 17.1452 +	}
 17.1453 +
 17.1454 +	cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
 17.1455 +
 17.1456 +	printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
 17.1457 +		(long)cacheflush_time/(cpu_khz/1000),
 17.1458 +		((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
 17.1459 +	printk("task migration cache decay timeout: %ld msecs.\n",
 17.1460 +		cache_decay_ticks);
 17.1461 +}
 17.1462 +#else
 17.1463 +#define smp_tune_scheduling() ((void)0)
 17.1464 +#endif
 17.1465  
 17.1466  /*
 17.1467   * Cycle through the processors sending APIC IPIs to boot each.
 17.1468 @@ -776,178 +913,274 @@ static void __init do_boot_cpu (int apic
 17.1469  
 17.1470  static int boot_cpu_logical_apicid;
 17.1471  /* Where the IO area was mapped on multiquad, always 0 otherwise */
 17.1472 -void *xquad_portio = NULL;
 17.1473 -
 17.1474 -void __init smp_boot_cpus(void)
 17.1475 -{
 17.1476 -    int apicid, bit;
 17.1477 -
 17.1478 -    /* Initialize the logical to physical CPU number mapping */
 17.1479 -    init_cpu_to_apicid();
 17.1480 -
 17.1481 -    /*
 17.1482 -     * Setup boot CPU information
 17.1483 -     */
 17.1484 -    smp_store_cpu_info(0); /* Final full version of the data */
 17.1485 -    printk("CPU%d booted\n", 0);
 17.1486 -
 17.1487 -    /*
 17.1488 -     * We have the boot CPU online for sure.
 17.1489 -     */
 17.1490 -    set_bit(0, &cpu_online_map);
 17.1491 -    boot_cpu_logical_apicid = logical_smp_processor_id();
 17.1492 -    map_cpu_to_boot_apicid(0, boot_cpu_apicid);
 17.1493 -
 17.1494 -    /*
 17.1495 -     * If we couldnt find an SMP configuration at boot time,
 17.1496 -     * get out of here now!
 17.1497 -     */
 17.1498 -    if (!smp_found_config || opt_nosmp) {
 17.1499 -        io_apic_irqs = 0;
 17.1500 -        phys_cpu_present_map = physid_mask_of_physid(0);
 17.1501 -        cpu_online_map = 1;
 17.1502 -        smp_num_cpus = 1;
 17.1503 -        if (APIC_init_uniprocessor())
 17.1504 -            printk("Local APIC not detected."
 17.1505 -                   " Using dummy APIC emulation.\n");
 17.1506 -        goto smp_done;
 17.1507 -    }
 17.1508 -
 17.1509 -    /*
 17.1510 -     * Should not be necessary because the MP table should list the boot
 17.1511 -     * CPU too, but we do it for the sake of robustness anyway.
 17.1512 -     */
 17.1513 -    if (!test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) {
 17.1514 -        printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
 17.1515 -               boot_cpu_physical_apicid);
 17.1516 -        physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 17.1517 -    }
 17.1518 -
 17.1519 -    /*
 17.1520 -     * If we couldn't find a local APIC, then get out of here now!
 17.1521 -     */
 17.1522 -    if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
 17.1523 -        !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
 17.1524 -        printk("BIOS bug, local APIC #%d not detected!...\n",
 17.1525 -               boot_cpu_physical_apicid);
 17.1526 -        printk("... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
 17.1527 -        io_apic_irqs = 0;
 17.1528 -        phys_cpu_present_map = physid_mask_of_physid(0);
 17.1529 -        cpu_online_map = 1;
 17.1530 -        smp_num_cpus = 1;
 17.1531 -        goto smp_done;
 17.1532 -    }
 17.1533 -
 17.1534 -    verify_local_APIC();
 17.1535 -
 17.1536 -    /*
 17.1537 -     * If SMP should be disabled, then really disable it!
 17.1538 -     */
 17.1539 -    if (!max_cpus) {
 17.1540 -        smp_found_config = 0;
 17.1541 -        printk("SMP mode deactivated, forcing use of dummy APIC emulation.\n");
 17.1542 -        io_apic_irqs = 0;
 17.1543 -        phys_cpu_present_map = physid_mask_of_physid(0);
 17.1544 -        cpu_online_map = 1;
 17.1545 -        smp_num_cpus = 1;
 17.1546 -        goto smp_done;
 17.1547 -    }
 17.1548 -
 17.1549 -    connect_bsp_APIC();
 17.1550 -    setup_local_APIC();
 17.1551 +void *xquad_portio;
 17.1552  
 17.1553 -    if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid)
 17.1554 -        BUG();
 17.1555 -
 17.1556 -    /*
 17.1557 -     * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
 17.1558 -     *
 17.1559 -     * In clustered apic mode, phys_cpu_present_map is a constructed thus:
 17.1560 -     * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
 17.1561 -     * clustered apic ID.
 17.1562 -     */
 17.1563 -    Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
 17.1564 -
 17.1565 -    for (bit = 0; bit < NR_CPUS; bit++) {
 17.1566 -        apicid = cpu_present_to_apicid(bit);
 17.1567 -        /*
 17.1568 -         * Don't even attempt to start the boot CPU!
 17.1569 -         */
 17.1570 -        if (apicid == boot_cpu_apicid)
 17.1571 -            continue;
 17.1572 -
 17.1573 -        /* 
 17.1574 -         * Don't start hyperthreads if option noht requested.
 17.1575 -         */
 17.1576 -        if (opt_noht && (apicid & (ht_per_core - 1)))
 17.1577 -            continue;
 17.1578 -
 17.1579 -        if (!check_apicid_present(bit))
 17.1580 -            continue;
 17.1581 -        if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
 17.1582 -            continue;
 17.1583 -
 17.1584 -        do_boot_cpu(apicid);
 17.1585 -
 17.1586 -        /*
 17.1587 -         * Make sure we unmap all failed CPUs
 17.1588 -         */
 17.1589 -        if ((boot_apicid_to_cpu(apicid) == -1) &&
 17.1590 -            (!check_apicid_present(bit)))
 17.1591 -            printk("CPU #%d not responding - cannot use it.\n",
 17.1592 -                   apicid);
 17.1593 -    }
 17.1594 +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
 17.1595  
 17.1596 -    /*
 17.1597 -     * Cleanup possible dangling ends...
 17.1598 -     */
 17.1599 -    /*
 17.1600 -     * Install writable page 0 entry to set BIOS data area.
 17.1601 -     */
 17.1602 -    local_flush_tlb();
 17.1603 -
 17.1604 -    /*
 17.1605 -     * Paranoid:  Set warm reset code and vector here back
 17.1606 -     * to default values.
 17.1607 -     */
 17.1608 -    CMOS_WRITE(0, 0xf);
 17.1609 -
 17.1610 -    *((volatile long *) phys_to_virt(0x467)) = 0;
 17.1611 +static void __init smp_boot_cpus(unsigned int max_cpus)
 17.1612 +{
 17.1613 +	int apicid, cpu, bit, kicked;
 17.1614 +#ifdef BOGOMIPS
 17.1615 +	unsigned long bogosum = 0;
 17.1616 +#endif
 17.1617  
 17.1618 -    if (!cpucount) {
 17.1619 -        printk("Error: only one processor found.\n");
 17.1620 -    } else {
 17.1621 -        printk("Total of %d processors activated.\n", cpucount+1);
 17.1622 -    }
 17.1623 -    smp_num_cpus = cpucount + 1;
 17.1624 -
 17.1625 -    Dprintk("Boot done.\n");
 17.1626 +	/*
 17.1627 +	 * Setup boot CPU information
 17.1628 +	 */
 17.1629 +	smp_store_cpu_info(0); /* Final full version of the data */
 17.1630 +	printk("CPU%d: ", 0);
 17.1631 +	print_cpu_info(&cpu_data[0]);
 17.1632  
 17.1633 -    /*
 17.1634 -     * Here we can be sure that there is an IO-APIC in the system. Let's
 17.1635 -     * go and set it up:
 17.1636 -     */
 17.1637 -    if ( nr_ioapics ) setup_IO_APIC();
 17.1638 +	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 17.1639 +	boot_cpu_logical_apicid = logical_smp_processor_id();
 17.1640 +	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
 17.1641  
 17.1642 -    /* Set up all local APIC timers in the system. */
 17.1643 -    {
 17.1644 -        extern void setup_APIC_clocks(void);
 17.1645 -        setup_APIC_clocks();
 17.1646 -    }
 17.1647 +	/*current_thread_info()->cpu = 0;*/
 17.1648 +	smp_tune_scheduling();
 17.1649 +	cpus_clear(cpu_sibling_map[0]);
 17.1650 +	cpu_set(0, cpu_sibling_map[0]);
 17.1651  
 17.1652 -    /* Synchronize the TSC with the AP(s). */
 17.1653 -    if ( cpucount ) synchronize_tsc_bp();
 17.1654 +	/*
 17.1655 +	 * If we couldn't find an SMP configuration at boot time,
 17.1656 +	 * get out of here now!
 17.1657 +	 */
 17.1658 +	if (!smp_found_config && !acpi_lapic) {
 17.1659 +		printk(KERN_NOTICE "SMP motherboard not detected.\n");
 17.1660 +		smpboot_clear_io_apic_irqs();
 17.1661 +		phys_cpu_present_map = physid_mask_of_physid(0);
 17.1662 +		if (APIC_init_uniprocessor())
 17.1663 +			printk(KERN_NOTICE "Local APIC not detected."
 17.1664 +					   " Using dummy APIC emulation.\n");
 17.1665 +		map_cpu_to_logical_apicid();
 17.1666 +		return;
 17.1667 +	}
 17.1668  
 17.1669 - smp_done:
 17.1670 -    ;
 17.1671 +	/*
 17.1672 +	 * Should not be necessary because the MP table should list the boot
 17.1673 +	 * CPU too, but we do it for the sake of robustness anyway.
 17.1674 +	 * Makes no sense to do this check in clustered apic mode, so skip it
 17.1675 +	 */
 17.1676 +	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
 17.1677 +		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
 17.1678 +				boot_cpu_physical_apicid);
 17.1679 +		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 17.1680 +	}
 17.1681 +
 17.1682 +	/*
 17.1683 +	 * If we couldn't find a local APIC, then get out of here now!
 17.1684 +	 */
 17.1685 +	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
 17.1686 +		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
 17.1687 +			boot_cpu_physical_apicid);
 17.1688 +		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
 17.1689 +		smpboot_clear_io_apic_irqs();
 17.1690 +		phys_cpu_present_map = physid_mask_of_physid(0);
 17.1691 +		return;
 17.1692 +	}
 17.1693 +
 17.1694 +	verify_local_APIC();
 17.1695 +
 17.1696 +	/*
 17.1697 +	 * If SMP should be disabled, then really disable it!
 17.1698 +	 */
 17.1699 +	if (!max_cpus) {
 17.1700 +		smp_found_config = 0;
 17.1701 +		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
 17.1702 +		smpboot_clear_io_apic_irqs();
 17.1703 +		phys_cpu_present_map = physid_mask_of_physid(0);
 17.1704 +		return;
 17.1705 +	}
 17.1706 +
 17.1707 +	connect_bsp_APIC();
 17.1708 +	setup_local_APIC();
 17.1709 +	map_cpu_to_logical_apicid();
 17.1710 +
 17.1711 +
 17.1712 +	setup_portio_remap();
 17.1713 +
 17.1714 +	/*
 17.1715 +	 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
 17.1716 +	 *
 17.1717 +	 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
 17.1718 +	 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
 17.1719 +	 * clustered apic ID.
 17.1720 +	 */
 17.1721 +	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
 17.1722 +
 17.1723 +	kicked = 1;
 17.1724 +	for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
 17.1725 +		apicid = cpu_present_to_apicid(bit);
 17.1726 +		/*
 17.1727 +		 * Don't even attempt to start the boot CPU!
 17.1728 +		 */
 17.1729 +		if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
 17.1730 +			continue;
 17.1731 +
 17.1732 +		if (!check_apicid_present(bit))
 17.1733 +			continue;
 17.1734 +		if (max_cpus <= cpucount+1)
 17.1735 +			continue;
 17.1736 +
 17.1737 +		if (do_boot_cpu(apicid))
 17.1738 +			printk("CPU #%d not responding - cannot use it.\n",
 17.1739 +								apicid);
 17.1740 +		else
 17.1741 +			++kicked;
 17.1742 +	}
 17.1743 +
 17.1744 +	/*
 17.1745 +	 * Cleanup possible dangling ends...
 17.1746 +	 */
 17.1747 +	smpboot_restore_warm_reset_vector();
 17.1748 +
 17.1749 +#ifdef BOGOMIPS
 17.1750 +	/*
 17.1751 +	 * Allow the user to impress friends.
 17.1752 +	 */
 17.1753 +	Dprintk("Before bogomips.\n");
 17.1754 +	for (cpu = 0; cpu < NR_CPUS; cpu++)
 17.1755 +		if (cpu_isset(cpu, cpu_callout_map))
 17.1756 +			bogosum += cpu_data[cpu].loops_per_jiffy;
 17.1757 +	printk(KERN_INFO
 17.1758 +		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 17.1759 +		cpucount+1,
 17.1760 +		bogosum/(500000/HZ),
 17.1761 +		(bogosum/(5000/HZ))%100);
 17.1762 +#else
 17.1763 +	printk("Total of %d processors activated.\n", cpucount+1);
 17.1764 +#endif
 17.1765 +	
 17.1766 +	Dprintk("Before bogocount - setting activated=1.\n");
 17.1767 +
 17.1768 +	if (smp_b_stepping)
 17.1769 +		printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
 17.1770 +
 17.1771 +	/*
 17.1772 +	 * Don't taint if we are running SMP kernel on a single non-MP
 17.1773 +	 * approved Athlon
 17.1774 +	 */
 17.1775 +	if (tainted & TAINT_UNSAFE_SMP) {
 17.1776 +		if (cpucount)
 17.1777 +			printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
 17.1778 +		else
 17.1779 +			tainted &= ~TAINT_UNSAFE_SMP;
 17.1780 +	}
 17.1781 +
 17.1782 +	Dprintk("Boot done.\n");
 17.1783 +
 17.1784 +	/*
 17.1785 +	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
 17.1786 +	 * efficiently.
 17.1787 +	 */
 17.1788 +	for (cpu = 0; cpu < NR_CPUS; cpu++)
 17.1789 +		cpus_clear(cpu_sibling_map[cpu]);
 17.1790 +
 17.1791 +	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 17.1792 +		int siblings = 0;
 17.1793 +		int i;
 17.1794 +		if (!cpu_isset(cpu, cpu_callout_map))
 17.1795 +			continue;
 17.1796 +
 17.1797 +		if (smp_num_siblings > 1) {
 17.1798 +			for (i = 0; i < NR_CPUS; i++) {
 17.1799 +				if (!cpu_isset(i, cpu_callout_map))
 17.1800 +					continue;
 17.1801 +				if (phys_proc_id[cpu] == phys_proc_id[i]) {
 17.1802 +					siblings++;
 17.1803 +					cpu_set(i, cpu_sibling_map[cpu]);
 17.1804 +				}
 17.1805 +			}
 17.1806 +		} else {
 17.1807 +			siblings++;
 17.1808 +			cpu_set(cpu, cpu_sibling_map[cpu]);
 17.1809 +		}
 17.1810 +
 17.1811 +		if (siblings != smp_num_siblings)
 17.1812 +			printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
 17.1813 +	}
 17.1814 +
 17.1815 +	if (nmi_watchdog == NMI_LOCAL_APIC)
 17.1816 +		check_nmi_watchdog();
 17.1817 +
 17.1818 +	smpboot_setup_io_apic();
 17.1819 +
 17.1820 +	setup_boot_APIC_clock();
 17.1821 +
 17.1822 +	/*
 17.1823 +	 * Synchronize the TSC with the AP
 17.1824 +	 */
 17.1825 +	if (cpu_has_tsc && cpucount && cpu_khz)
 17.1826 +		synchronize_tsc_bp();
 17.1827  }
 17.1828  
 17.1829 -/*
 17.1830 - * Local variables:
 17.1831 - * mode: C
 17.1832 - * c-set-style: "BSD"
 17.1833 - * c-basic-offset: 4
 17.1834 - * tab-width: 4
 17.1835 - * indent-tabs-mode: nil
 17.1836 - * End:
 17.1837 - */
 17.1838 +/* These are wrappers to interface to the new boot process.  Someone
 17.1839 +   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
 17.1840 +void __init smp_prepare_cpus(unsigned int max_cpus)
 17.1841 +{
 17.1842 +	smp_boot_cpus(max_cpus);
 17.1843 +}
 17.1844 +
 17.1845 +void __devinit smp_prepare_boot_cpu(void)
 17.1846 +{
 17.1847 +	cpu_set(smp_processor_id(), cpu_online_map);
 17.1848 +	cpu_set(smp_processor_id(), cpu_callout_map);
 17.1849 +}
 17.1850 +
 17.1851 +int __devinit __cpu_up(unsigned int cpu)
 17.1852 +{
 17.1853 +	/* This only works at boot for x86.  See "rewrite" above. */
 17.1854 +	if (cpu_isset(cpu, smp_commenced_mask)) {
 17.1855 +		local_irq_enable();
 17.1856 +		return -ENOSYS;
 17.1857 +	}
 17.1858 +
 17.1859 +	/* In case one didn't come up */
 17.1860 +	if (!cpu_isset(cpu, cpu_callin_map)) {
 17.1861 +		local_irq_enable();
 17.1862 +		return -EIO;
 17.1863 +	}
 17.1864 +
 17.1865 +	local_irq_enable();
 17.1866 +	/* Unleash the CPU! */
 17.1867 +	cpu_set(cpu, smp_commenced_mask);
 17.1868 +	while (!cpu_isset(cpu, cpu_online_map))
 17.1869 +		mb();
 17.1870 +	return 0;
 17.1871 +}
 17.1872 +
 17.1873 +void __init smp_cpus_done(unsigned int max_cpus)
 17.1874 +{
 17.1875 +#ifdef CONFIG_X86_IO_APIC
 17.1876 +	setup_ioapic_dest();
 17.1877 +#endif
 17.1878 +#ifdef CONFIG_X86_64
 17.1879 +	zap_low_mappings();
 17.1880 +#endif
 17.1881 +	/*
 17.1882 +	 * Disable executability of the SMP trampoline:
 17.1883 +	 */
 17.1884 +	set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
 17.1885 +}
 17.1886 +
 17.1887 +#if 0
 17.1888 +void __init smp_intr_init(void)
 17.1889 +{
 17.1890 +	/*
 17.1891 +	 * IRQ0 must be given a fixed assignment and initialized,
 17.1892 +	 * because it's used before the IO-APIC is set up.
 17.1893 +	 */
 17.1894 +	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
 17.1895 +
 17.1896 +	/*
 17.1897 +	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 17.1898 +	 * IPI, driven by wakeup.
 17.1899 +	 */
 17.1900 +	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 17.1901 +
 17.1902 +	/* IPI for invalidation */
 17.1903 +	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
 17.1904 +
 17.1905 +	/* IPI for generic function call */
 17.1906 +	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 17.1907 +}
 17.1908 +#endif
    18.1 --- a/xen/arch/x86/time.c	Wed May 25 10:32:53 2005 +0000
    18.2 +++ b/xen/arch/x86/time.c	Wed May 25 10:36:59 2005 +0000
    18.3 @@ -37,7 +37,6 @@ unsigned long cpu_khz;  /* Detected as w
    18.4  unsigned long ticks_per_usec; /* TSC ticks per microsecond. */
    18.5  spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
    18.6  int timer_ack = 0;
    18.7 -int do_timer_lists_from_pit = 0;
    18.8  unsigned long volatile jiffies;
    18.9  
   18.10  /* PRIVATE */
   18.11 @@ -91,7 +90,7 @@ void timer_interrupt(int irq, void *dev_
   18.12      write_unlock_irq(&time_lock);
   18.13  
   18.14      /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
   18.15 -    if ( do_timer_lists_from_pit )
   18.16 +    if ( !cpu_has_apic )
   18.17          raise_softirq(AC_TIMER_SOFTIRQ);
   18.18  }
   18.19  
    19.1 --- a/xen/arch/x86/traps.c	Wed May 25 10:32:53 2005 +0000
    19.2 +++ b/xen/arch/x86/traps.c	Wed May 25 10:36:59 2005 +0000
    19.3 @@ -99,6 +99,7 @@ integer_param("debug_stack_lines", debug
    19.4  
    19.5  static inline int kernel_text_address(unsigned long addr)
    19.6  {
    19.7 +    extern char _stext, _etext;
    19.8      if (addr >= (unsigned long) &_stext &&
    19.9          addr <= (unsigned long) &_etext)
   19.10          return 1;
    20.1 --- a/xen/arch/x86/vmx.c	Wed May 25 10:32:53 2005 +0000
    20.2 +++ b/xen/arch/x86/vmx.c	Wed May 25 10:36:59 2005 +0000
    20.3 @@ -22,10 +22,10 @@
    20.4  #include <xen/lib.h>
    20.5  #include <xen/trace.h>
    20.6  #include <xen/sched.h>
    20.7 +#include <xen/irq.h>
    20.8  #include <xen/softirq.h>
    20.9  #include <asm/current.h>
   20.10  #include <asm/io.h>
   20.11 -#include <asm/irq.h>
   20.12  #include <asm/shadow.h>
   20.13  #include <asm/regs.h>
   20.14  #include <asm/cpufeature.h>
   20.15 @@ -49,7 +49,7 @@ extern long evtchn_send(int lport);
   20.16  extern long do_block(void);
   20.17  void do_nmi(struct cpu_user_regs *, unsigned long);
   20.18  
   20.19 -int start_vmx()
   20.20 +int start_vmx(void)
   20.21  {
   20.22      struct vmcs_struct *vmcs;
   20.23      u32 ecx;
   20.24 @@ -70,12 +70,14 @@ int start_vmx()
   20.25      if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) {
   20.26          if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) {
   20.27                  printk("VMX disabled by Feature Control MSR.\n");
   20.28 -		return 0;
   20.29 +                return 0;
   20.30          }
   20.31      }
   20.32 -    else 
   20.33 +    else {
   20.34          wrmsr(IA32_FEATURE_CONTROL_MSR, 
   20.35 -              IA32_FEATURE_CONTROL_MSR_LOCK | IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
   20.36 +              IA32_FEATURE_CONTROL_MSR_LOCK |
   20.37 +              IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0);
   20.38 +    }
   20.39  
   20.40      set_in_cr4(X86_CR4_VMXE);   /* Enable VMXE */
   20.41  
   20.42 @@ -93,7 +95,7 @@ int start_vmx()
   20.43      return 1;
   20.44  }
   20.45  
   20.46 -void stop_vmx()
   20.47 +void stop_vmx(void)
   20.48  {
   20.49      if (read_cr4() & X86_CR4_VMXE)
   20.50          __vmxoff();
   20.51 @@ -167,7 +169,7 @@ static int vmx_do_page_fault(unsigned lo
   20.52      return result;
   20.53  }
   20.54  
   20.55 -static void vmx_do_no_device_fault() 
   20.56 +static void vmx_do_no_device_fault(void)
   20.57  {
   20.58      unsigned long cr0;
   20.59          
    21.1 --- a/xen/common/ac_timer.c	Wed May 25 10:32:53 2005 +0000
    21.2 +++ b/xen/common/ac_timer.c	Wed May 25 10:36:59 2005 +0000
    21.3 @@ -125,7 +125,7 @@ static int add_entry(struct ac_timer **h
    21.4          struct ac_timer **new_heap = xmalloc_array(struct ac_timer *, limit);
    21.5          if ( new_heap == NULL ) BUG();
    21.6          memcpy(new_heap, heap, (limit>>1)*sizeof(struct ac_timer *));
    21.7 -        for ( i = 0; i < smp_num_cpus; i++ )
    21.8 +        for ( i = 0; i < NR_CPUS; i++ )
    21.9              if ( ac_timers[i].heap == heap )
   21.10                  ac_timers[i].heap = new_heap;
   21.11          xfree(heap);
   21.12 @@ -248,7 +248,7 @@ static void dump_timerq(unsigned char ke
   21.13      printk("Dumping ac_timer queues: NOW=0x%08X%08X\n",
   21.14             (u32)(now>>32), (u32)now); 
   21.15  
   21.16 -    for ( i = 0; i < smp_num_cpus; i++ )
   21.17 +    for_each_online_cpu( i )
   21.18      {
   21.19          printk("CPU[%02d] ", i);
   21.20          spin_lock_irqsave(&ac_timers[i].lock, flags);
   21.21 @@ -270,7 +270,7 @@ void __init ac_timer_init(void)
   21.22  
   21.23      open_softirq(AC_TIMER_SOFTIRQ, ac_timer_softirq_action);
   21.24  
   21.25 -    for ( i = 0; i < smp_num_cpus; i++ )
   21.26 +    for ( i = 0; i < NR_CPUS; i++ )
   21.27      {
   21.28          ac_timers[i].heap = xmalloc_array(
   21.29              struct ac_timer *, DEFAULT_HEAP_LIMIT+1);
    22.1 --- a/xen/common/dom0_ops.c	Wed May 25 10:32:53 2005 +0000
    22.2 +++ b/xen/common/dom0_ops.c	Wed May 25 10:36:59 2005 +0000
    22.3 @@ -155,7 +155,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    22.4          unsigned int        pro;
    22.5          domid_t             dom;
    22.6          struct exec_domain *ed;
    22.7 -        unsigned int        i, ht, cnt[NR_CPUS] = { 0 };
    22.8 +        unsigned int        i, cnt[NR_CPUS] = { 0 };
    22.9  
   22.10  
   22.11          dom = op->u.createdomain.domain;
   22.12 @@ -182,9 +182,8 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
   22.13           * domains will all share the second HT of each CPU. Since dom0 is on 
   22.14  	     * CPU 0, we favour high numbered CPUs in the event of a tie.
   22.15           */
   22.16 -        ht = opt_noht ? 1 : ht_per_core;
   22.17 -        pro = ht-1;
   22.18 -        for ( i = pro; i < smp_num_cpus; i += ht )
   22.19 +        pro = ht_per_core - 1;
   22.20 +        for ( i = pro; i < num_online_cpus(); i += ht_per_core )
   22.21              if ( cnt[i] <= cnt[pro] )
   22.22                  pro = i;
   22.23  
   22.24 @@ -269,7 +268,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
   22.25          else
   22.26          {
   22.27              /* pick a new cpu from the usable map */
   22.28 -            int new_cpu = (int)find_first_set_bit(cpumap) % smp_num_cpus;
   22.29 +            int new_cpu = (int)find_first_set_bit(cpumap) % num_online_cpus();
   22.30  
   22.31              exec_domain_pause(ed);
   22.32              if ( ed->processor != new_cpu )
    23.1 --- a/xen/common/domain.c	Wed May 25 10:32:53 2005 +0000
    23.2 +++ b/xen/common/domain.c	Wed May 25 10:36:59 2005 +0000
    23.3 @@ -50,7 +50,10 @@ struct domain *do_createdomain(domid_t d
    23.4      INIT_LIST_HEAD(&d->page_list);
    23.5      INIT_LIST_HEAD(&d->xenpage_list);
    23.6  
    23.7 -    if ( (d->domain_id != IDLE_DOMAIN_ID) &&
    23.8 +    if ( d->domain_id == IDLE_DOMAIN_ID )
    23.9 +        set_bit(_DOMF_idle_domain, &d->domain_flags);
   23.10 +
   23.11 +    if ( !is_idle_task(d) &&
   23.12           ((init_event_channels(d) != 0) || (grant_table_create(d) != 0)) )
   23.13      {
   23.14          destroy_event_channels(d);
   23.15 @@ -62,7 +65,7 @@ struct domain *do_createdomain(domid_t d
   23.16      
   23.17      sched_add_domain(ed);
   23.18  
   23.19 -    if ( d->domain_id != IDLE_DOMAIN_ID )
   23.20 +    if ( !is_idle_task(d) )
   23.21      {
   23.22          write_lock(&domlist_lock);
   23.23          pd = &domain_list; /* NB. domain_list maintained in order of dom_id. */
    24.1 --- a/xen/common/page_alloc.c	Wed May 25 10:32:53 2005 +0000
    24.2 +++ b/xen/common/page_alloc.c	Wed May 25 10:36:59 2005 +0000
    24.3 @@ -45,8 +45,8 @@ string_param("badpage", opt_badpage);
    24.4  #define round_pgdown(_p)  ((_p)&PAGE_MASK)
    24.5  #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
    24.6  
    24.7 -static spinlock_t page_scrub_lock;
    24.8 -struct list_head page_scrub_list;
    24.9 +static spinlock_t page_scrub_lock = SPIN_LOCK_UNLOCKED;
   24.10 +LIST_HEAD(page_scrub_list);
   24.11  
   24.12  /*********************
   24.13   * ALLOCATION BITMAP
   24.14 @@ -675,8 +675,6 @@ static void page_scrub_softirq(void)
   24.15  
   24.16  static __init int page_scrub_init(void)
   24.17  {
   24.18 -    spin_lock_init(&page_scrub_lock);
   24.19 -    INIT_LIST_HEAD(&page_scrub_list);
   24.20      open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
   24.21      return 0;
   24.22  }
    25.1 --- a/xen/common/perfc.c	Wed May 25 10:32:53 2005 +0000
    25.2 +++ b/xen/common/perfc.c	Wed May 25 10:36:59 2005 +0000
    25.3 @@ -55,10 +55,11 @@ void perfc_printall(unsigned char key)
    25.4              break;
    25.5          case TYPE_CPU:
    25.6          case TYPE_S_CPU:
    25.7 -            for ( j = sum = 0; j < smp_num_cpus; j++ )
    25.8 +            sum = 0;
    25.9 +            for_each_online_cpu ( j )
   25.10                  sum += atomic_read(&counters[j]);
   25.11              printk("TOTAL[%10d]  ", sum);
   25.12 -            for ( j = 0; j < smp_num_cpus; j++ )
   25.13 +            for_each_online_cpu ( j )
   25.14                  printk("CPU%02d[%10d]  ", j, atomic_read(&counters[j]));
   25.15              counters += NR_CPUS;
   25.16              break;
   25.17 @@ -84,7 +85,7 @@ void perfc_printall(unsigned char key)
   25.18  
   25.19  void perfc_reset(unsigned char key)
   25.20  {
   25.21 -    int i, j, sum;
   25.22 +    int i, j;
   25.23      s_time_t now = NOW();
   25.24      atomic_t *counters = (atomic_t *)&perfcounters;
   25.25  
   25.26 @@ -104,13 +105,13 @@ void perfc_reset(unsigned char key)
   25.27              counters += 1;
   25.28              break;
   25.29          case TYPE_CPU:
   25.30 -            for ( j = sum = 0; j < smp_num_cpus; j++ )
   25.31 +            for ( j = 0; j < NR_CPUS; j++ )
   25.32                  atomic_set(&counters[j],0);
   25.33          case TYPE_S_CPU:
   25.34              counters += NR_CPUS;
   25.35              break;
   25.36          case TYPE_ARRAY:
   25.37 -            for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
   25.38 +            for ( j = 0; j < NR_CPUS; j++ )
   25.39                  atomic_set(&counters[j],0);
   25.40          case TYPE_S_ARRAY:
   25.41              counters += perfc_info[i].nr_elements;
   25.42 @@ -146,7 +147,7 @@ static int perfc_copy_info(dom0_perfc_de
   25.43                  break;
   25.44              case TYPE_CPU:
   25.45              case TYPE_S_CPU:
   25.46 -                perfc_d[i].nr_vals = smp_num_cpus;
   25.47 +                perfc_d[i].nr_vals = num_online_cpus();
   25.48                  break;
   25.49              case TYPE_ARRAY:
   25.50              case TYPE_S_ARRAY:
    26.1 --- a/xen/common/sched_bvt.c	Wed May 25 10:32:53 2005 +0000
    26.2 +++ b/xen/common/sched_bvt.c	Wed May 25 10:36:59 2005 +0000
    26.3 @@ -169,14 +169,19 @@ static inline u32 calc_evt(struct exec_d
    26.4  static int bvt_alloc_task(struct exec_domain *ed)
    26.5  {
    26.6      struct domain *d = ed->domain;
    26.7 -    if ( (d->sched_priv == NULL) ) {
    26.8 +
    26.9 +    if ( (d->sched_priv == NULL) )
   26.10 +    {
   26.11          if ( (d->sched_priv = xmalloc(struct bvt_dom_info)) == NULL )
   26.12              return -1;
   26.13          memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
   26.14      }
   26.15 +
   26.16      ed->sched_priv = &BVT_INFO(d)->ed_inf[ed->vcpu_id];
   26.17 +
   26.18      BVT_INFO(d)->ed_inf[ed->vcpu_id].inf = BVT_INFO(d);
   26.19      BVT_INFO(d)->ed_inf[ed->vcpu_id].exec_domain = ed;
   26.20 +
   26.21      return 0;
   26.22  }
   26.23  
   26.24 @@ -190,6 +195,15 @@ static void bvt_add_task(struct exec_dom
   26.25      ASSERT(inf != NULL);
   26.26      ASSERT(d   != NULL);
   26.27  
   26.28 +    /* Allocate per-CPU context if this is the first domain to be added. */
   26.29 +    if ( CPU_INFO(d->processor) == NULL )
   26.30 +    {
   26.31 +        schedule_data[d->processor].sched_priv = xmalloc(struct bvt_cpu_info);
   26.32 +        BUG_ON(CPU_INFO(d->processor) == NULL);
   26.33 +        INIT_LIST_HEAD(RUNQUEUE(d->processor));
   26.34 +        CPU_SVT(d->processor) = 0;
   26.35 +    }
   26.36 +
   26.37      if ( d->vcpu_id == 0 )
   26.38      {
   26.39          inf->mcu_advance = MCU_ADVANCE;
   26.40 @@ -213,9 +227,11 @@ static void bvt_add_task(struct exec_dom
   26.41  
   26.42      einf->exec_domain = d;
   26.43  
   26.44 -    if ( d->domain->domain_id == IDLE_DOMAIN_ID )
   26.45 +    if ( is_idle_task(d->domain) )
   26.46      {
   26.47          einf->avt = einf->evt = ~0U;
   26.48 +        BUG_ON(__task_on_runqueue(d));
   26.49 +        __add_to_runqueue_head(d);
   26.50      } 
   26.51      else 
   26.52      {
   26.53 @@ -225,20 +241,6 @@ static void bvt_add_task(struct exec_dom
   26.54      }
   26.55  }
   26.56  
   26.57 -static int bvt_init_idle_task(struct exec_domain *ed)
   26.58 -{
   26.59 -    if ( bvt_alloc_task(ed) < 0 )
   26.60 -        return -1;
   26.61 -
   26.62 -    bvt_add_task(ed);
   26.63 -
   26.64 -    set_bit(_VCPUF_running, &ed->vcpu_flags);
   26.65 -    if ( !__task_on_runqueue(ed) )
   26.66 -        __add_to_runqueue_head(ed);
   26.67 -
   26.68 -    return 0;
   26.69 -}
   26.70 -
   26.71  static void bvt_wake(struct exec_domain *ed)
   26.72  {
   26.73      struct bvt_edom_info *einf = EBVT_INFO(ed);
   26.74 @@ -548,36 +550,11 @@ static void bvt_dump_cpu_state(int i)
   26.75      }
   26.76  }
   26.77  
   26.78 -/* Initialise the data structures. */
   26.79 -static int bvt_init_scheduler(void)
   26.80 -{
   26.81 -    int i;
   26.82 -
   26.83 -    for ( i = 0; i < NR_CPUS; i++ )
   26.84 -    {
   26.85 -        schedule_data[i].sched_priv = xmalloc(struct bvt_cpu_info);
   26.86 -       
   26.87 -        if ( schedule_data[i].sched_priv == NULL )
   26.88 -        {
   26.89 -            printk("Failed to allocate BVT scheduler per-CPU memory!\n");
   26.90 -            return -1;
   26.91 -        }
   26.92 -
   26.93 -        INIT_LIST_HEAD(RUNQUEUE(i));
   26.94 -        
   26.95 -        CPU_SVT(i) = 0; /* XXX do I really need to do this? */
   26.96 -    }
   26.97 -
   26.98 -    return 0;
   26.99 -}
  26.100 -
  26.101  struct scheduler sched_bvt_def = {
  26.102      .name     = "Borrowed Virtual Time",
  26.103      .opt_name = "bvt",
  26.104      .sched_id = SCHED_BVT,
  26.105      
  26.106 -    .init_scheduler = bvt_init_scheduler,
  26.107 -    .init_idle_task = bvt_init_idle_task,
  26.108      .alloc_task     = bvt_alloc_task,
  26.109      .add_task       = bvt_add_task,
  26.110      .free_task      = bvt_free_task,
    27.1 --- a/xen/common/sched_sedf.c	Wed May 25 10:32:53 2005 +0000
    27.2 +++ b/xen/common/sched_sedf.c	Wed May 25 10:36:59 2005 +0000
    27.3 @@ -13,20 +13,18 @@
    27.4  #include <xen/time.h>
    27.5  #include <xen/slab.h>
    27.6  
    27.7 -/*#include <xen/adv_sched_hist.h>*/
    27.8 -
    27.9  /*verbosity settings*/
   27.10  #define SEDFLEVEL 0
   27.11  #define PRINT(_f, _a...)  \
   27.12 -if ((_f)<=SEDFLEVEL) printk(_a );
   27.13 +    if ((_f)<=SEDFLEVEL) printk(_a );
   27.14  
   27.15  #ifndef NDEBUG
   27.16 -	#define SEDF_STATS
   27.17 -	#define CHECK(_p) if ( !(_p) ) \
   27.18 -	{ printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\
   27.19 -	__FILE__);}
   27.20 +#define SEDF_STATS
   27.21 +#define CHECK(_p) if ( !(_p) ) \
   27.22 + { printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\
   27.23 + __FILE__);}
   27.24  #else
   27.25 -	#define CHECK(_p) ((void)0)
   27.26 +#define CHECK(_p) ((void)0)
   27.27  #endif
   27.28  
   27.29  /*various ways of unblocking domains*/
   27.30 @@ -64,72 +62,72 @@ if ((_f)<=SEDFLEVEL) printk(_a );
   27.31  
   27.32  
   27.33  struct sedf_dom_info {
   27.34 -	struct domain		*domain;
   27.35 +    struct domain  *domain;
   27.36  };
   27.37  struct sedf_edom_info
   27.38  {
   27.39 -	struct exec_domain	*exec_domain;
   27.40 -	struct list_head	list;
   27.41 -	struct list_head	extralist[2];
   27.42 -	
   27.43 -	/*Parameters for EDF*/
   27.44 -	s_time_t		period;		/*=(relative deadline)*/
   27.45 -	s_time_t		slice;		/*=worst case execution time*/
   27.46 -	
   27.47 -	/*Advaced Parameters*/
   27.48 -	/*Latency Scaling*/
   27.49 -	s_time_t		period_orig;	
   27.50 -	s_time_t		slice_orig;
   27.51 -	s_time_t		latency;
   27.52 -	
   27.53 -	/*status of domain*/
   27.54 -	int			status;
   27.55 -	/*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
   27.56 -	short			weight;
   27.57 -        short                   extraweight;
   27.58 -        /*Bookkeeping*/
   27.59 -	s_time_t		deadl_abs;
   27.60 -	s_time_t		sched_start_abs;
   27.61 -	s_time_t		cputime;
   27.62 -	/* times the domain un-/blocked */
   27.63 -	s_time_t		block_abs;
   27.64 -	s_time_t		unblock_abs;
   27.65 -	
   27.66 -	/*scores for {util, block penalty}-weighted extratime distribution*/
   27.67 -	int			score[2];	
   27.68 -	s_time_t		short_block_lost_tot;
   27.69 -	
   27.70 -	/*Statistics*/
   27.71 -	s_time_t		extra_time_tot;
   27.72 +    struct exec_domain *exec_domain;
   27.73 +    struct list_head list;
   27.74 +    struct list_head extralist[2];
   27.75 + 
   27.76 +    /*Parameters for EDF*/
   27.77 +    s_time_t  period;  /*=(relative deadline)*/
   27.78 +    s_time_t  slice;  /*=worst case execution time*/
   27.79 + 
   27.80 +    /*Advaced Parameters*/
   27.81 +    /*Latency Scaling*/
   27.82 +    s_time_t  period_orig; 
   27.83 +    s_time_t  slice_orig;
   27.84 +    s_time_t  latency;
   27.85 + 
   27.86 +    /*status of domain*/
   27.87 +    int   status;
   27.88 +    /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
   27.89 +    short   weight;
   27.90 +    short                   extraweight;
   27.91 +    /*Bookkeeping*/
   27.92 +    s_time_t  deadl_abs;
   27.93 +    s_time_t  sched_start_abs;
   27.94 +    s_time_t  cputime;
   27.95 +    /* times the domain un-/blocked */
   27.96 +    s_time_t  block_abs;
   27.97 +    s_time_t  unblock_abs;
   27.98 + 
   27.99 +    /*scores for {util, block penalty}-weighted extratime distribution*/
  27.100 +    int   score[2]; 
  27.101 +    s_time_t  short_block_lost_tot;
  27.102 + 
  27.103 +    /*Statistics*/
  27.104 +    s_time_t  extra_time_tot;
  27.105  
  27.106  #ifdef SEDF_STATS
  27.107 -	s_time_t		block_time_tot;
  27.108 -	s_time_t		penalty_time_tot;
  27.109 -	int			block_tot;
  27.110 -	int			short_block_tot;
  27.111 -	int			long_block_tot;
  27.112 -	int			short_cont;
  27.113 -	int			pen_extra_blocks;
  27.114 -	int			pen_extra_slices;
  27.115 +    s_time_t  block_time_tot;
  27.116 +    s_time_t  penalty_time_tot;
  27.117 +    int   block_tot;
  27.118 +    int   short_block_tot;
  27.119 +    int   long_block_tot;
  27.120 +    int   short_cont;
  27.121 +    int   pen_extra_blocks;
  27.122 +    int   pen_extra_slices;
  27.123  #endif
  27.124  };
  27.125  
  27.126  struct sedf_cpu_info {
  27.127 -	struct list_head runnableq;
  27.128 -	struct list_head waitq;
  27.129 -	struct list_head extraq[2];
  27.130 +    struct list_head runnableq;
  27.131 +    struct list_head waitq;
  27.132 +    struct list_head extraq[2];
  27.133  };
  27.134  
  27.135 -#define EDOM_INFO(d)		((struct sedf_edom_info *)((d)->sched_priv))
  27.136 -#define CPU_INFO(cpu)	((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
  27.137 -#define LIST(d)			(&EDOM_INFO(d)->list)
  27.138 -#define EXTRALIST(d,i)		(&(EDOM_INFO(d)->extralist[i]))
  27.139 -#define RUNQ(cpu)   		(&CPU_INFO(cpu)->runnableq)
  27.140 -#define WAITQ(cpu)   		(&CPU_INFO(cpu)->waitq)
  27.141 -#define EXTRAQ(cpu,i)  		(&(CPU_INFO(cpu)->extraq[i]))
  27.142 -#define IDLETASK(cpu)		((struct exec_domain *)schedule_data[cpu].idle)
  27.143 +#define EDOM_INFO(d)  ((struct sedf_edom_info *)((d)->sched_priv))
  27.144 +#define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
  27.145 +#define LIST(d)   (&EDOM_INFO(d)->list)
  27.146 +#define EXTRALIST(d,i)  (&(EDOM_INFO(d)->extralist[i]))
  27.147 +#define RUNQ(cpu)     (&CPU_INFO(cpu)->runnableq)
  27.148 +#define WAITQ(cpu)     (&CPU_INFO(cpu)->waitq)
  27.149 +#define EXTRAQ(cpu,i)    (&(CPU_INFO(cpu)->extraq[i]))
  27.150 +#define IDLETASK(cpu)  ((struct exec_domain *)schedule_data[cpu].idle)
  27.151  
  27.152 -#define PERIOD_BEGIN(inf)	((inf)->deadl_abs - (inf)->period)
  27.153 +#define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
  27.154  
  27.155  #define MIN(x,y) (((x)<(y))?(x):(y))
  27.156  #define DIV_UP(x,y) (((x) + (y) - 1) / y)
  27.157 @@ -142,8 +140,8 @@ struct sedf_cpu_info {
  27.158  static void sedf_dump_cpu_state(int i);
  27.159  
  27.160  static inline int extraq_on(struct exec_domain *d, int i) {
  27.161 -	return ((EXTRALIST(d,i)->next != NULL) &&
  27.162 -		(EXTRALIST(d,i)->next != EXTRALIST(d,i)));
  27.163 +    return ((EXTRALIST(d,i)->next != NULL) &&
  27.164 +            (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
  27.165  }
  27.166  
  27.167  static inline void extraq_add_head(struct exec_domain *d, int i)
  27.168 @@ -160,13 +158,13 @@ static inline void extraq_add_tail(struc
  27.169  
  27.170  static inline void extraq_del(struct exec_domain *d, int i)
  27.171  {
  27.172 -	struct list_head *list = EXTRALIST(d,i);
  27.173 -	ASSERT(extraq_on(d,i));
  27.174 -	PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id,
  27.175 -	   d->vcpu_id, i);	
  27.176 -	list_del(list);
  27.177 -	list->next = NULL;
  27.178 -	ASSERT(!extraq_on(d, i));
  27.179 +    struct list_head *list = EXTRALIST(d,i);
  27.180 +    ASSERT(extraq_on(d,i));
  27.181 +    PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id,
  27.182 +          d->vcpu_id, i); 
  27.183 +    list_del(list);
  27.184 +    list->next = NULL;
  27.185 +    ASSERT(!extraq_on(d, i));
  27.186  }
  27.187  
  27.188  /* adds a domain to the queue of processes which are aware of extra time. List
  27.189 @@ -176,92 +174,92 @@ static inline void extraq_del(struct exe
  27.190     charging each domain that recieved extratime with an inverse of its weight.
  27.191   */ 
  27.192  static inline void extraq_add_sort_update(struct exec_domain *d, int i, int sub) {
  27.193 -	struct list_head      *cur;
  27.194 -	struct sedf_edom_info *curinf;
  27.195 -	
  27.196 -	ASSERT(!extraq_on(d,i));
  27.197 -	PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
  27.198 -              " to L%i extraq\n",
  27.199 -              d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
  27.200 -              EDOM_INFO(d)->short_block_lost_tot, i);	
  27.201 -	/*iterate through all elements to find our "hole" and on our way
  27.202 -	  update all the other scores*/
  27.203 -	list_for_each(cur,EXTRAQ(d->processor,i)){
  27.204 -		curinf = list_entry(cur,struct sedf_edom_info,extralist[i]);
  27.205 -		curinf->score[i] -= sub;
  27.206 -		if (EDOM_INFO(d)->score[i] < curinf->score[i])
  27.207 -	 		break;
  27.208 -		else
  27.209 -			PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
  27.210 -			      curinf->exec_domain->domain->domain_id,
  27.211 -			      curinf->exec_domain->vcpu_id, curinf->score[i]);
  27.212 -	}
  27.213 -	/*cur now contains the element, before which we'll enqueue*/
  27.214 -	PRINT(3, "\tlist_add to %p\n", cur->prev);
  27.215 -	list_add(EXTRALIST(d,i),cur->prev);
  27.216 -	
  27.217 -	/*continue updating the extraq*/
  27.218 -	if ((cur != EXTRAQ(d->processor,i)) && sub)
  27.219 -		for (cur = cur->next; cur != EXTRAQ(d->processor,i);
  27.220 -		     cur = cur-> next) {
  27.221 -			curinf = list_entry(cur,struct sedf_edom_info,
  27.222 -				extralist[i]);
  27.223 -			curinf->score[i] -= sub;
  27.224 -			PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
  27.225 -			      curinf->exec_domain->domain->domain_id, 
  27.226 -			      curinf->exec_domain->vcpu_id, curinf->score[i]);
  27.227 -		}
  27.228 -	ASSERT(extraq_on(d,i));
  27.229 +    struct list_head      *cur;
  27.230 +    struct sedf_edom_info *curinf;
  27.231 + 
  27.232 +    ASSERT(!extraq_on(d,i));
  27.233 +    PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
  27.234 +          " to L%i extraq\n",
  27.235 +          d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
  27.236 +          EDOM_INFO(d)->short_block_lost_tot, i); 
  27.237 +    /*iterate through all elements to find our "hole" and on our way
  27.238 +      update all the other scores*/
  27.239 +    list_for_each(cur,EXTRAQ(d->processor,i)){
  27.240 +        curinf = list_entry(cur,struct sedf_edom_info,extralist[i]);
  27.241 +        curinf->score[i] -= sub;
  27.242 +        if (EDOM_INFO(d)->score[i] < curinf->score[i])
  27.243 +            break;
  27.244 +        else
  27.245 +            PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
  27.246 +                  curinf->exec_domain->domain->domain_id,
  27.247 +                  curinf->exec_domain->vcpu_id, curinf->score[i]);
  27.248 +    }
  27.249 +    /*cur now contains the element, before which we'll enqueue*/
  27.250 +    PRINT(3, "\tlist_add to %p\n", cur->prev);
  27.251 +    list_add(EXTRALIST(d,i),cur->prev);
  27.252 + 
  27.253 +    /*continue updating the extraq*/
  27.254 +    if ((cur != EXTRAQ(d->processor,i)) && sub)
  27.255 +        for (cur = cur->next; cur != EXTRAQ(d->processor,i);
  27.256 +             cur = cur-> next) {
  27.257 +            curinf = list_entry(cur,struct sedf_edom_info,
  27.258 +                                extralist[i]);
  27.259 +            curinf->score[i] -= sub;
  27.260 +            PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
  27.261 +                  curinf->exec_domain->domain->domain_id, 
  27.262 +                  curinf->exec_domain->vcpu_id, curinf->score[i]);
  27.263 +        }
  27.264 +    ASSERT(extraq_on(d,i));
  27.265  }
  27.266  static inline void extraq_check(struct exec_domain *d) {
  27.267 -	if (extraq_on(d, EXTRA_UTIL_Q)) {
  27.268 -		PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id);
  27.269 -		if (!(EDOM_INFO(d)->status & EXTRA_AWARE) &&
  27.270 -		    !extra_runs(EDOM_INFO(d))) {
  27.271 -			extraq_del(d, EXTRA_UTIL_Q);
  27.272 -			PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
  27.273 -			      d->domain->domain_id, d->vcpu_id);
  27.274 -		}
  27.275 -	} else {
  27.276 -		PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id,
  27.277 -		      d->vcpu_id);
  27.278 -		if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d))
  27.279 -		{
  27.280 -			#if (EXTRA == EXTRA_ROUNDR)
  27.281 -			extraq_add_tail(d, EXTRA_UTIL_Q);
  27.282 -			#elif (EXTRA == EXTRA_SLICE_WEIGHT || \
  27.283 -			       EXTRA == EXTRA_BLOCK_WEIGHT)
  27.284 -			extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
  27.285 -			#elif
  27.286 -			;
  27.287 -			#endif
  27.288 -			PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id,
  27.289 -			      d->vcpu_id);
  27.290 -		}
  27.291 -	}
  27.292 +    if (extraq_on(d, EXTRA_UTIL_Q)) {
  27.293 +        PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id);
  27.294 +        if (!(EDOM_INFO(d)->status & EXTRA_AWARE) &&
  27.295 +            !extra_runs(EDOM_INFO(d))) {
  27.296 +            extraq_del(d, EXTRA_UTIL_Q);
  27.297 +            PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
  27.298 +                  d->domain->domain_id, d->vcpu_id);
  27.299 +        }
  27.300 +    } else {
  27.301 +        PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id,
  27.302 +              d->vcpu_id);
  27.303 +        if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d))
  27.304 +        {
  27.305 +#if (EXTRA == EXTRA_ROUNDR)
  27.306 +            extraq_add_tail(d, EXTRA_UTIL_Q);
  27.307 +#elif (EXTRA == EXTRA_SLICE_WEIGHT || \
  27.308 +          EXTRA == EXTRA_BLOCK_WEIGHT)
  27.309 +            extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
  27.310 +#elif
  27.311 +            ;
  27.312 +#endif
  27.313 +            PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id,
  27.314 +                  d->vcpu_id);
  27.315 +        }
  27.316 +    }
  27.317  }
  27.318  
  27.319  static inline void extraq_check_add_unblocked(struct exec_domain *d, 
  27.320 -    int priority) {
  27.321 -	struct sedf_edom_info *inf = EDOM_INFO(d);
  27.322 -	if (inf->status & EXTRA_AWARE) 
  27.323 -	#if (EXTRA == EXTRA_ROUNDR)
  27.324 -		if (priority)
  27.325 -			extraq_add_head(d,EXTRA_UTIL_Q);
  27.326 -		else
  27.327 -			extraq_add_tail(d,EXTRA_UTIL_Q);
  27.328 -	#elif (EXTRA == EXTRA_SLICE_WEIGHT \
  27.329 -	    || EXTRA == EXTRA_BLOCK_WEIGHT)
  27.330 -		/*put in on the weighted extraq, 
  27.331 -		  without updating any scores*/
  27.332 -		extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
  27.333 -	#else
  27.334 -		;
  27.335 -	#endif
  27.336 +                                              int priority) {
  27.337 +    struct sedf_edom_info *inf = EDOM_INFO(d);
  27.338 +    if (inf->status & EXTRA_AWARE) 
  27.339 +#if (EXTRA == EXTRA_ROUNDR)
  27.340 +        if (priority)
  27.341 +            extraq_add_head(d,EXTRA_UTIL_Q);
  27.342 +        else
  27.343 +            extraq_add_tail(d,EXTRA_UTIL_Q);
  27.344 +#elif (EXTRA == EXTRA_SLICE_WEIGHT \
  27.345 +     || EXTRA == EXTRA_BLOCK_WEIGHT)
  27.346 +    /*put in on the weighted extraq, 
  27.347 +    without updating any scores*/
  27.348 +    extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
  27.349 +#else
  27.350 +    ;
  27.351 +#endif
  27.352  }
  27.353  
  27.354  static inline int __task_on_queue(struct exec_domain *d) {
  27.355 -	return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
  27.356 +    return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
  27.357  }
  27.358  static inline void __del_from_queue(struct exec_domain *d)
  27.359  {
  27.360 @@ -277,41 +275,41 @@ static inline void __del_from_queue(stru
  27.361  typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
  27.362  
  27.363  static inline void list_insert_sort(struct list_head *list,
  27.364 -    struct list_head *element, list_comparer comp) {
  27.365 -	struct list_head     *cur;
  27.366 -	/*iterate through all elements to find our "hole"*/
  27.367 -	list_for_each(cur,list){
  27.368 -		if (comp(element, cur) < 0)
  27.369 -	 		break;
  27.370 -	}
  27.371 -	/*cur now contains the element, before which we'll enqueue*/
  27.372 -	PRINT(3,"\tlist_add to %p\n",cur->prev);
  27.373 -	list_add(element, cur->prev);
  27.374 +                                    struct list_head *element, list_comparer comp) {
  27.375 +    struct list_head     *cur;
  27.376 +    /*iterate through all elements to find our "hole"*/
  27.377 +    list_for_each(cur,list){
  27.378 +        if (comp(element, cur) < 0)
  27.379 +            break;
  27.380 +    }
  27.381 +    /*cur now contains the element, before which we'll enqueue*/
  27.382 +    PRINT(3,"\tlist_add to %p\n",cur->prev);
  27.383 +    list_add(element, cur->prev);
  27.384  }  
  27.385  #define DOMAIN_COMPARER(name, field, comp1, comp2)          \
  27.386  int name##_comp(struct list_head* el1, struct list_head* el2) \
  27.387  {                                                           \
  27.388 -	struct sedf_edom_info *d1, *d2;                     \
  27.389 -	d1 = list_entry(el1,struct sedf_edom_info, field);  \
  27.390 -	d2 = list_entry(el2,struct sedf_edom_info, field);  \
  27.391 -	if ((comp1) == (comp2))                             \
  27.392 -		return 0;                                   \
  27.393 -	if ((comp1) < (comp2))                              \
  27.394 -		return -1;                                  \
  27.395 -	else                                                \
  27.396 -		return 1;                                   \
  27.397 + struct sedf_edom_info *d1, *d2;                     \
  27.398 + d1 = list_entry(el1,struct sedf_edom_info, field);  \
  27.399 + d2 = list_entry(el2,struct sedf_edom_info, field);  \
  27.400 + if ((comp1) == (comp2))                             \
  27.401 +  return 0;                                   \
  27.402 + if ((comp1) < (comp2))                              \
  27.403 +  return -1;                                  \
  27.404 + else                                                \
  27.405 +  return 1;                                   \
  27.406  }
  27.407  /* adds a domain to the queue of processes which wait for the beginning of the
  27.408     next period; this list is therefore sortet by this time, which is simply
  27.409     absol. deadline - period
  27.410   */ 
  27.411  DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2))
  27.412 -static inline void __add_to_waitqueue_sort(struct exec_domain *d) {
  27.413 -	ASSERT(!__task_on_queue(d));
  27.414 -	PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
  27.415 -              d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
  27.416 -	list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp);
  27.417 -	ASSERT(__task_on_queue(d));
  27.418 +    static inline void __add_to_waitqueue_sort(struct exec_domain *d) {
  27.419 +    ASSERT(!__task_on_queue(d));
  27.420 +    PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
  27.421 +          d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
  27.422 +    list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp);
  27.423 +    ASSERT(__task_on_queue(d));
  27.424  }
  27.425  
  27.426  /* adds a domain to the queue of processes which have started their current
  27.427 @@ -320,247 +318,228 @@ static inline void __add_to_waitqueue_so
  27.428     task will run. As we are implementing EDF, this list is sorted by deadlines.
  27.429   */ 
  27.430  DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs)
  27.431 -static inline void __add_to_runqueue_sort(struct exec_domain *d) {
  27.432 -	PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
  27.433 -              d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs);
  27.434 -	list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
  27.435 -}
  27.436 -
  27.437 -/* Initialises the queues */
  27.438 -static int sedf_init_scheduler() {
  27.439 -	int i;
  27.440 -	PRINT(2,"sedf_init_scheduler was called\n");
  27.441 -	
  27.442 -	for ( i = 0; i < NR_CPUS; i++ ) {
  27.443 -		schedule_data[i].sched_priv = 
  27.444 -			xmalloc(struct sedf_cpu_info);
  27.445 -		if ( schedule_data[i].sched_priv == NULL )
  27.446 -			return -1;
  27.447 -		INIT_LIST_HEAD(WAITQ(i));
  27.448 -		INIT_LIST_HEAD(RUNQ(i));
  27.449 -		INIT_LIST_HEAD(EXTRAQ(i,EXTRA_PEN_Q));
  27.450 -		INIT_LIST_HEAD(EXTRAQ(i,EXTRA_UTIL_Q));
  27.451 -	}
  27.452 -	return 0;   
  27.453 +    static inline void __add_to_runqueue_sort(struct exec_domain *d) {
  27.454 +    PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
  27.455 +          d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs);
  27.456 +    list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
  27.457  }
  27.458  
  27.459  /* Allocates memory for per domain private scheduling data*/
  27.460  static int sedf_alloc_task(struct exec_domain *d) {
  27.461 -	PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
  27.462 -	      d->vcpu_id);
  27.463 -	if (d->domain->sched_priv == NULL) {
  27.464 -		if ((d->domain->sched_priv = 
  27.465 -		     xmalloc(struct sedf_dom_info)) == NULL )
  27.466 -		return -1;
  27.467 -		memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
  27.468 -	}
  27.469 -	if ((d->sched_priv = xmalloc(struct sedf_edom_info)) == NULL )
  27.470 -		return -1;
  27.471 -	memset(d->sched_priv, 0, sizeof(struct sedf_edom_info));
  27.472 -	return 0;
  27.473 +    PRINT(2,"sedf_alloc_task was called, domain-id %i.%i\n",d->domain->domain_id,
  27.474 +          d->vcpu_id);
  27.475 +    if (d->domain->sched_priv == NULL) {
  27.476 +        if ((d->domain->sched_priv = 
  27.477 +             xmalloc(struct sedf_dom_info)) == NULL )
  27.478 +            return -1;
  27.479 +        memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
  27.480 +    }
  27.481 +    if ((d->sched_priv = xmalloc(struct sedf_edom_info)) == NULL )
  27.482 +        return -1;
  27.483 +    memset(d->sched_priv, 0, sizeof(struct sedf_edom_info));
  27.484 +    return 0;
  27.485  }
  27.486  
  27.487  /* Setup the sedf_dom_info */
  27.488  static void sedf_add_task(struct exec_domain *d)
  27.489  {
  27.490 -	struct sedf_edom_info *inf = EDOM_INFO(d);
  27.491 -	inf->exec_domain = d;
  27.492 -	
  27.493 -	PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id,
  27.494 -	      d->vcpu_id);
  27.495 -	      
  27.496 -	if (d->domain->domain_id==0) {
  27.497 -		/*set dom0 to something useful to boot the machine*/
  27.498 -		inf->period    = MILLISECS(20);
  27.499 -		inf->slice     = MILLISECS(15);
  27.500 -		inf->latency   = 0;
  27.501 -		inf->deadl_abs = 0;
  27.502 -		inf->status     = EXTRA_NONE | SEDF_ASLEEP;/*EXTRA_AWARE; */
  27.503 -	}
  27.504 -	else {
  27.505 -		/*other domains run in best effort mode*/
  27.506 -		inf->period    = WEIGHT_PERIOD;
  27.507 -		inf->slice     = 0;
  27.508 -		inf->deadl_abs = 0;
  27.509 -		inf->latency   = 0;
  27.510 -		inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
  27.511 -		inf->extraweight = 1;
  27.512 -	}
  27.513 -	inf->period_orig = inf->period; inf->slice_orig = inf->slice;
  27.514 -	INIT_LIST_HEAD(&(inf->list));
  27.515 -	INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
  27.516 -	INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
  27.517 -	
  27.518 -	if (d->domain->domain_id != IDLE_DOMAIN_ID) {
  27.519 -		extraq_check(d);
  27.520 -	}
  27.521 +    struct sedf_edom_info *inf = EDOM_INFO(d);
  27.522 +    inf->exec_domain = d;
  27.523 + 
  27.524 +    PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id,
  27.525 +          d->vcpu_id);
  27.526 +
  27.527 +    /* Allocate per-CPU context if this is the first domain to be added. */
  27.528 +    if ( schedule_data[d->processor].sched_priv == NULL )
  27.529 +    {
  27.530 +        schedule_data[d->processor].sched_priv = 
  27.531 +            xmalloc(struct sedf_cpu_info);
  27.532 +        BUG_ON(schedule_data[d->processor].sched_priv == NULL);
  27.533 +        INIT_LIST_HEAD(WAITQ(d->processor));
  27.534 +        INIT_LIST_HEAD(RUNQ(d->processor));
  27.535 +        INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_PEN_Q));
  27.536 +        INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
  27.537 +    }
  27.538 +       
  27.539 +    if (d->domain->domain_id==0) {
  27.540 +        /*set dom0 to something useful to boot the machine*/
  27.541 +        inf->period    = MILLISECS(20);
  27.542 +        inf->slice     = MILLISECS(15);
  27.543 +        inf->latency   = 0;
  27.544 +        inf->deadl_abs = 0;
  27.545 +        inf->status     = EXTRA_NONE | SEDF_ASLEEP;/*EXTRA_AWARE; */
  27.546 +    } else {
  27.547 +        /*other domains run in best effort mode*/
  27.548 +        inf->period    = WEIGHT_PERIOD;
  27.549 +        inf->slice     = 0;
  27.550 +        inf->deadl_abs = 0;
  27.551 +        inf->latency   = 0;
  27.552 +        inf->status     = EXTRA_AWARE | SEDF_ASLEEP;
  27.553 +        inf->extraweight = 1;
  27.554 +    }
  27.555 +    inf->period_orig = inf->period; inf->slice_orig = inf->slice;
  27.556 +    INIT_LIST_HEAD(&(inf->list));
  27.557 +    INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
  27.558 +    INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
  27.559 + 
  27.560 +    if (!is_idle_task(d->domain)) {
  27.561 +        extraq_check(d);
  27.562 +    } else {
  27.563 +        EDOM_INFO(d)->deadl_abs = 0;
  27.564 +        EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
  27.565 +    }
  27.566  }
  27.567  
  27.568  /* Frees memory used by domain info */
  27.569  static void sedf_free_task(struct domain *d)
  27.570  {
  27.571 -	int i;
  27.572 -	PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
  27.573 -	ASSERT(d->sched_priv != NULL);
  27.574 -	xfree(d->sched_priv);
  27.575 -	
  27.576 -	for (i = 0; i < MAX_VIRT_CPUS; i++)
  27.577 -        	if ( d->exec_domain[i] ) {
  27.578 -			ASSERT(d->exec_domain[i]->sched_priv != NULL);
  27.579 -			xfree(d->exec_domain[i]->sched_priv);
  27.580 -		}
  27.581 -}
  27.582 -
  27.583 -/* Initialises idle task */
  27.584 -static int sedf_init_idle_task(struct exec_domain *d) {
  27.585 -	PRINT(2,"sedf_init_idle_task was called, domain-id %i.%i\n",
  27.586 -	      d->domain->domain_id, d->vcpu_id);
  27.587 -	if ( sedf_alloc_task(d) < 0 )
  27.588 -		return -1;
  27.589 -	
  27.590 -	sedf_add_task(d);
  27.591 -	EDOM_INFO(d)->deadl_abs = 0;
  27.592 -	EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
  27.593 -	set_bit(_VCPUF_running, &d->vcpu_flags);
  27.594 -	/*the idle task doesn't have to turn up on any list...*/
  27.595 -	return 0;
  27.596 +    int i;
  27.597 +    PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
  27.598 +    ASSERT(d->sched_priv != NULL);
  27.599 +    xfree(d->sched_priv);
  27.600 + 
  27.601 +    for (i = 0; i < MAX_VIRT_CPUS; i++)
  27.602 +        if ( d->exec_domain[i] ) {
  27.603 +            ASSERT(d->exec_domain[i]->sched_priv != NULL);
  27.604 +            xfree(d->exec_domain[i]->sched_priv);
  27.605 +        }
  27.606  }
  27.607  
  27.608  /* handles the rescheduling, bookkeeping of domains running in their realtime-time :)*/
  27.609  static inline void desched_edf_dom (s_time_t now, struct exec_domain* d) {
  27.610 -	struct sedf_edom_info* inf = EDOM_INFO(d);
  27.611 -	/*current domain is running in real time mode*/
  27.612 -	
  27.613 -	ASSERT(__task_on_queue(d));
  27.614 -	/*update the domains cputime*/
  27.615 -	inf->cputime += now - inf->sched_start_abs;
  27.616 +    struct sedf_edom_info* inf = EDOM_INFO(d);
  27.617 +    /*current domain is running in real time mode*/
  27.618 + 
  27.619 +    ASSERT(__task_on_queue(d));
  27.620 +    /*update the domains cputime*/
  27.621 +    inf->cputime += now - inf->sched_start_abs;
  27.622  
  27.623 -	/*scheduling decisions, which don't remove the running domain
  27.624 -	  from the runq*/
  27.625 -	if ((inf->cputime < inf->slice) && sedf_runnable(d))
  27.626 -		return;
  27.627 -		
  27.628 -	__del_from_queue(d);
  27.629 -		
  27.630 -	/*manage bookkeeping (i.e. calculate next deadline,
  27.631 -	  memorize overun-time of slice) of finished domains*/
  27.632 -	if (inf->cputime >= inf->slice) {
  27.633 -		inf->cputime -= inf->slice;
  27.634 -		
  27.635 -		if (inf->period < inf->period_orig) {
  27.636 -			/*this domain runs in latency scaling or burst mode*/
  27.637 -			#if (UNBLOCK == UNBLOCK_BURST)
  27.638 -			/*if we are runnig in burst scaling wait for two periods
  27.639 -			  before scaling periods up again*/ 
  27.640 -			if (now - inf->unblock_abs >= 2 * inf->period)
  27.641 -			#endif
  27.642 -			{
  27.643 -				inf->period *= 2; inf->slice *= 2;
  27.644 -				if ((inf->period > inf->period_orig) ||
  27.645 -				    (inf->slice > inf->slice_orig)) {
  27.646 -					/*reset slice & period*/
  27.647 -					inf->period = inf->period_orig;
  27.648 -					inf->slice = inf->slice_orig;
  27.649 -				}
  27.650 -			}
  27.651 -		}
  27.652 -		/*set next deadline*/
  27.653 -		inf->deadl_abs += inf->period;
  27.654 -	}
  27.655 -	
  27.656 -	/*add a runnable domain to the waitqueue*/
  27.657 -	if (sedf_runnable(d))
  27.658 -		__add_to_waitqueue_sort(d);
  27.659 -	else {
  27.660 -		/*we have a blocked realtime task -> remove it from exqs too*/
  27.661 -		#if (EXTRA > EXTRA_OFF)
  27.662 -		#if (EXTRA == EXTRA_BLOCK_WEIGHT)
  27.663 -		if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
  27.664 -		#endif
  27.665 -		if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
  27.666 -		#endif
  27.667 -	}
  27.668 -	ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
  27.669 -	ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
  27.670 -	  sedf_runnable(d)));
  27.671 +    /*scheduling decisions, which don't remove the running domain
  27.672 +      from the runq*/
  27.673 +    if ((inf->cputime < inf->slice) && sedf_runnable(d))
  27.674 +        return;
  27.675 +  
  27.676 +    __del_from_queue(d);
  27.677 +  
  27.678 +    /*manage bookkeeping (i.e. calculate next deadline,
  27.679 +      memorize overun-time of slice) of finished domains*/
  27.680 +    if (inf->cputime >= inf->slice) {
  27.681 +        inf->cputime -= inf->slice;
  27.682 +  
  27.683 +        if (inf->period < inf->period_orig) {
  27.684 +            /*this domain runs in latency scaling or burst mode*/
  27.685 +#if (UNBLOCK == UNBLOCK_BURST)
  27.686 +            /*if we are runnig in burst scaling wait for two periods
  27.687 +              before scaling periods up again*/ 
  27.688 +            if (now - inf->unblock_abs >= 2 * inf->period)
  27.689 +#endif
  27.690 +            {
  27.691 +                inf->period *= 2; inf->slice *= 2;
  27.692 +                if ((inf->period > inf->period_orig) ||
  27.693 +                    (inf->slice > inf->slice_orig)) {
  27.694 +                    /*reset slice & period*/
  27.695 +                    inf->period = inf->period_orig;
  27.696 +                    inf->slice = inf->slice_orig;
  27.697 +                }
  27.698 +            }
  27.699 +        }
  27.700 +        /*set next deadline*/
  27.701 +        inf->deadl_abs += inf->period;
  27.702 +    }
  27.703 + 
  27.704 +    /*add a runnable domain to the waitqueue*/
  27.705 +    if (sedf_runnable(d))
  27.706 +        __add_to_waitqueue_sort(d);
  27.707 +    else {
  27.708 +        /*we have a blocked realtime task -> remove it from exqs too*/
  27.709 +#if (EXTRA > EXTRA_OFF)
  27.710 +#if (EXTRA == EXTRA_BLOCK_WEIGHT)
  27.711 +        if (extraq_on(d, EXTRA_PEN_Q)) extraq_del(d, EXTRA_PEN_Q);
  27.712 +#endif
  27.713 +        if (extraq_on(d, EXTRA_UTIL_Q)) extraq_del(d, EXTRA_UTIL_Q);
  27.714 +#endif
  27.715 +    }
  27.716 +    ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
  27.717 +    ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
  27.718 +                 sedf_runnable(d)));
  27.719  }
  27.720  
  27.721  /* Update all elements on the queues */
  27.722  static inline void update_queues(s_time_t now, struct list_head* runq, 
  27.723 -struct list_head* waitq) {
  27.724 -	struct list_head     *cur,*tmp;
  27.725 -	struct sedf_edom_info *curinf;
  27.726 -	
  27.727 -	PRINT(3,"Updating waitq..\n");
  27.728 -	/*check for the first elements of the waitqueue, whether their
  27.729 -	  next period has already started*/
  27.730 -	list_for_each_safe(cur, tmp, waitq) {
  27.731 -		curinf = list_entry(cur, struct sedf_edom_info, list);
  27.732 -		PRINT(4,"\tLooking @ dom %i.%i\n",
  27.733 -		      curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
  27.734 -		if (PERIOD_BEGIN(curinf) <= now) {
  27.735 -			__del_from_queue(curinf->exec_domain);
  27.736 -			__add_to_runqueue_sort(curinf->exec_domain);
  27.737 -		}
  27.738 -		else
  27.739 -			break;
  27.740 -	}
  27.741 -	
  27.742 -	PRINT(3,"Updating runq..\n");
  27.743 -	/*process the runq, find domains that are on
  27.744 -	  the runqueue which shouldn't be there*/
  27.745 -	list_for_each_safe(cur, tmp, runq) {
  27.746 -		curinf = list_entry(cur,struct sedf_edom_info,list);
  27.747 -		PRINT(4,"\tLooking @ dom %i.%i\n",
  27.748 -		      curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
  27.749 -		if (unlikely(curinf->slice == 0)) {
  27.750 -			/*ignore domains with empty slice*/
  27.751 -			PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
  27.752 -			      curinf->exec_domain->domain->domain_id,
  27.753 -			      curinf->exec_domain->vcpu_id);
  27.754 -			__del_from_queue(curinf->exec_domain);
  27.755 -			
  27.756 -			/*move them to their next period*/
  27.757 -			curinf->deadl_abs += curinf->period;
  27.758 -			/*and put them back into the queue*/
  27.759 -			__add_to_waitqueue_sort(curinf->exec_domain);
  27.760 -			continue;
  27.761 -		}
  27.762 -		if (unlikely((curinf->deadl_abs < now) ||
  27.763 -			(curinf->cputime > curinf->slice))) {
  27.764 -			/*we missed the deadline or the slice was
  27.765 -				already finished... might hapen because
  27.766 -				of dom_adj.*/
  27.767 -			PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
  27.768 -				"slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
  27.769 -				" cputime: %"PRIu64"\n",
  27.770 -				curinf->exec_domain->domain->domain_id,
  27.771 -				curinf->exec_domain->vcpu_id,
  27.772 -				curinf->deadl_abs, curinf->slice, now,
  27.773 -				curinf->cputime);
  27.774 -			__del_from_queue(curinf->exec_domain);
  27.775 -			/*common case: we miss one period!*/
  27.776 -			curinf->deadl_abs += curinf->period;
  27.777 -			
  27.778 -			/*if we are still behind: modulo arithmetic,
  27.779 -				force deadline to be in future and
  27.780 -				aligned to period borders!*/
  27.781 -			if (unlikely(curinf->deadl_abs < now))
  27.782 -				curinf->deadl_abs += 
  27.783 -					DIV_UP(now - curinf->deadl_abs,
  27.784 -					curinf->period) * curinf->period;
  27.785 -			ASSERT(curinf->deadl_abs > now);
  27.786 -			/*give a fresh slice*/
  27.787 -			curinf->cputime = 0;
  27.788 -			if (PERIOD_BEGIN(curinf) > now)
  27.789 -				__add_to_waitqueue_sort(curinf->exec_domain);
  27.790 -			else
  27.791 -				__add_to_runqueue_sort(curinf->exec_domain);
  27.792 -		}
  27.793 -		else
  27.794 -			break;
  27.795 -	}
  27.796 -	PRINT(3,"done updating the queues\n");
  27.797 +                                 struct list_head* waitq) {
  27.798 +    struct list_head     *cur,*tmp;
  27.799 +    struct sedf_edom_info *curinf;
  27.800 + 
  27.801 +    PRINT(3,"Updating waitq..\n");
  27.802 +    /*check for the first elements of the waitqueue, whether their
  27.803 +      next period has already started*/
  27.804 +    list_for_each_safe(cur, tmp, waitq) {
  27.805 +        curinf = list_entry(cur, struct sedf_edom_info, list);
  27.806 +        PRINT(4,"\tLooking @ dom %i.%i\n",
  27.807 +              curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
  27.808 +        if (PERIOD_BEGIN(curinf) <= now) {
  27.809 +            __del_from_queue(curinf->exec_domain);
  27.810 +            __add_to_runqueue_sort(curinf->exec_domain);
  27.811 +        }
  27.812 +        else
  27.813 +            break;
  27.814 +    }
  27.815 + 
  27.816 +    PRINT(3,"Updating runq..\n");
  27.817 +    /*process the runq, find domains that are on
  27.818 +      the runqueue which shouldn't be there*/
  27.819 +    list_for_each_safe(cur, tmp, runq) {
  27.820 +        curinf = list_entry(cur,struct sedf_edom_info,list);
  27.821 +        PRINT(4,"\tLooking @ dom %i.%i\n",
  27.822 +              curinf->exec_domain->domain->domain_id, curinf->exec_domain->vcpu_id);
  27.823 +        if (unlikely(curinf->slice == 0)) {
  27.824 +            /*ignore domains with empty slice*/
  27.825 +            PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
  27.826 +                  curinf->exec_domain->domain->domain_id,
  27.827 +                  curinf->exec_domain->vcpu_id);
  27.828 +            __del_from_queue(curinf->exec_domain);
  27.829 +   
  27.830 +            /*move them to their next period*/
  27.831 +            curinf->deadl_abs += curinf->period;
  27.832 +            /*and put them back into the queue*/
  27.833 +            __add_to_waitqueue_sort(curinf->exec_domain);
  27.834 +            continue;
  27.835 +        }
  27.836 +        if (unlikely((curinf->deadl_abs < now) ||
  27.837 +                     (curinf->cputime > curinf->slice))) {
  27.838 +            /*we missed the deadline or the slice was
  27.839 +              already finished... might hapen because
  27.840 +              of dom_adj.*/
  27.841 +            PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
  27.842 +                  "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
  27.843 +                  " cputime: %"PRIu64"\n",
  27.844 +                  curinf->exec_domain->domain->domain_id,
  27.845 +                  curinf->exec_domain->vcpu_id,
  27.846 +                  curinf->deadl_abs, curinf->slice, now,
  27.847 +                  curinf->cputime);
  27.848 +            __del_from_queue(curinf->exec_domain);
  27.849 +            /*common case: we miss one period!*/
  27.850 +            curinf->deadl_abs += curinf->period;
  27.851 +   
  27.852 +            /*if we are still behind: modulo arithmetic,
  27.853 +              force deadline to be in future and
  27.854 +              aligned to period borders!*/
  27.855 +            if (unlikely(curinf->deadl_abs < now))
  27.856 +                curinf->deadl_abs += 
  27.857 +                    DIV_UP(now - curinf->deadl_abs,
  27.858 +                           curinf->period) * curinf->period;
  27.859 +            ASSERT(curinf->deadl_abs > now);
  27.860 +            /*give a fresh slice*/
  27.861 +            curinf->cputime = 0;
  27.862 +            if (PERIOD_BEGIN(curinf) > now)
  27.863 +                __add_to_waitqueue_sort(curinf->exec_domain);
  27.864 +            else
  27.865 +                __add_to_runqueue_sort(curinf->exec_domain);
  27.866 +        }
  27.867 +        else
  27.868 +            break;
  27.869 +    }
  27.870 +    PRINT(3,"done updating the queues\n");
  27.871  }
  27.872  
  27.873  #if (EXTRA > EXTRA_OFF)
  27.874 @@ -571,140 +550,140 @@ struct list_head* waitq) {
  27.875     if the domain is blocked / has regained its short-block-loss
  27.876     time it is not put on any queue */
  27.877  static inline void desched_extra_dom(s_time_t now, struct exec_domain* d) {
  27.878 -	struct sedf_edom_info	*inf = EDOM_INFO(d);
  27.879 -	int 			i    = extra_get_cur_q(inf);
  27.880 -	
  27.881 +    struct sedf_edom_info *inf = EDOM_INFO(d);
  27.882 +    int    i    = extra_get_cur_q(inf);
  27.883 + 
  27.884  #if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
  27.885 -	unsigned long         oldscore;
  27.886 +    unsigned long         oldscore;
  27.887  #endif
  27.888 -	ASSERT(extraq_on(d, i));
  27.889 -	/*unset all running flags*/
  27.890 -	inf->status  &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
  27.891 -	/*fresh slice for the next run*/
  27.892 -	inf->cputime = 0;
  27.893 -	/*accumulate total extratime*/
  27.894 -	inf->extra_time_tot += now - inf->sched_start_abs;
  27.895 -	/*remove extradomain from head of the queue*/
  27.896 -	extraq_del(d, i);
  27.897 +    ASSERT(extraq_on(d, i));
  27.898 +    /*unset all running flags*/
  27.899 +    inf->status  &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
  27.900 +    /*fresh slice for the next run*/
  27.901 +    inf->cputime = 0;
  27.902 +    /*accumulate total extratime*/
  27.903 +    inf->extra_time_tot += now - inf->sched_start_abs;
  27.904 +    /*remove extradomain from head of the queue*/
  27.905 +    extraq_del(d, i);
  27.906  
  27.907  #if (EXTRA == EXTRA_ROUNDR)
  27.908 -	if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
  27.909 -		/*add to the tail if it is runnable => round-robin*/
  27.910 -		extraq_add_tail(d, EXTRA_UTIL_Q);
  27.911 +    if (sedf_runnable(d) && (inf->status & EXTRA_AWARE))
  27.912 +        /*add to the tail if it is runnable => round-robin*/
  27.913 +        extraq_add_tail(d, EXTRA_UTIL_Q);
  27.914  #elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
  27.915 -	/*update the score*/
  27.916 -	oldscore      = inf->score[i];
  27.917 +    /*update the score*/
  27.918 +    oldscore      = inf->score[i];
  27.919  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
  27.920 -	if (i == EXTRA_PEN_Q) {
  27.921 -		/*domain was running in L0 extraq*/
  27.922 -		/*reduce block lost, probably more sophistication here!*/
  27.923 -		/*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
  27.924 -		inf->short_block_lost_tot -= now - inf->sched_start_abs;
  27.925 -		PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n", 
  27.926 -		      inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id,
  27.927 -		      inf->short_block_lost_tot);
  27.928 -		if (inf->short_block_lost_tot <= 0) {
  27.929 -			PRINT(4,"Domain %i.%i compensated short block loss!\n",
  27.930 -			  inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id);
  27.931 -			/*we have (over-)compensated our block penalty*/
  27.932 -			inf->short_block_lost_tot = 0;
  27.933 -			/*we don't want a place on the penalty queue anymore!*/
  27.934 -			inf->status &= ~EXTRA_WANT_PEN_Q;
  27.935 -			goto check_extra_queues;
  27.936 -		}
  27.937 -		/*we have to go again for another try in the block-extraq,
  27.938 -		  the score is not used incremantally here, as this is
  27.939 -		  already done by recalculating the block_lost*/
  27.940 -		inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
  27.941 -		                          inf->short_block_lost_tot;
  27.942 -		oldscore = 0;
  27.943 -	} else
  27.944 +    if (i == EXTRA_PEN_Q) {
  27.945 +        /*domain was running in L0 extraq*/
  27.946 +        /*reduce block lost, probably more sophistication here!*/
  27.947 +        /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
  27.948 +        inf->short_block_lost_tot -= now - inf->sched_start_abs;
  27.949 +        PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n", 
  27.950 +              inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id,
  27.951 +              inf->short_block_lost_tot);
  27.952 +        if (inf->short_block_lost_tot <= 0) {
  27.953 +            PRINT(4,"Domain %i.%i compensated short block loss!\n",
  27.954 +                  inf->exec_domain->domain->domain_id, inf->exec_domain->vcpu_id);
  27.955 +            /*we have (over-)compensated our block penalty*/
  27.956 +            inf->short_block_lost_tot = 0;
  27.957 +            /*we don't want a place on the penalty queue anymore!*/
  27.958 +            inf->status &= ~EXTRA_WANT_PEN_Q;
  27.959 +            goto check_extra_queues;
  27.960 +        }
  27.961 +        /*we have to go again for another try in the block-extraq,
  27.962 +          the score is not used incremantally here, as this is
  27.963 +          already done by recalculating the block_lost*/
  27.964 +        inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
  27.965 +            inf->short_block_lost_tot;
  27.966 +        oldscore = 0;
  27.967 +    } else
  27.968  #endif
  27.969 -	{
  27.970 -		/*domain was running in L1 extraq => score is inverse of
  27.971 -		  utilization and is used somewhat incremental!*/
  27.972 -		if (!inf->extraweight)
  27.973 -			/*NB: use fixed point arithmetic with 10 bits*/
  27.974 -			inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
  27.975 -			                            inf->slice;
  27.976 -		else
  27.977 -			/*give a domain w/ exweight = 1 as much as a domain with
  27.978 -			  util = 1/128*/
  27.979 -			inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
  27.980 -	}
  27.981 -check_extra_queues:
  27.982 -	/* Adding a runnable domain to the right queue and removing blocked ones*/
  27.983 -	if (sedf_runnable(d)) {
  27.984 -		/*add according to score: weighted round robin*/
  27.985 -		if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q))
  27.986 -			extraq_add_sort_update(d, i, oldscore);
  27.987 -	}
  27.988 -	else {
  27.989 -		/*remove this blocked domain from the waitq!*/
  27.990 -		__del_from_queue(d);
  27.991 +    {
  27.992 +        /*domain was running in L1 extraq => score is inverse of
  27.993 +          utilization and is used somewhat incremental!*/
  27.994 +        if (!inf->extraweight)
  27.995 +            /*NB: use fixed point arithmetic with 10 bits*/
  27.996 +            inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
  27.997 +                inf->slice;
  27.998 +        else
  27.999 +            /*give a domain w/ exweight = 1 as much as a domain with
 27.1000 +              util = 1/128*/
 27.1001 +            inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
 27.1002 +    }
 27.1003 + check_extra_queues:
 27.1004 +    /* Adding a runnable domain to the right queue and removing blocked ones*/
 27.1005 +    if (sedf_runnable(d)) {
 27.1006 +        /*add according to score: weighted round robin*/
 27.1007 +        if (inf->status & (EXTRA_AWARE | EXTRA_WANT_PEN_Q))
 27.1008 +            extraq_add_sort_update(d, i, oldscore);
 27.1009 +    }
 27.1010 +    else {
 27.1011 +        /*remove this blocked domain from the waitq!*/
 27.1012 +        __del_from_queue(d);
 27.1013  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
 27.1014 -		/*make sure that we remove a blocked domain from the other
 27.1015 -		  extraq too*/
 27.1016 -		if (i == EXTRA_PEN_Q) {
 27.1017 -			if (extraq_on(d, EXTRA_UTIL_Q))
 27.1018 -				extraq_del(d, EXTRA_UTIL_Q);
 27.1019 -		}
 27.1020 -		else {
 27.1021 -			if (extraq_on(d, EXTRA_PEN_Q))
 27.1022 -				extraq_del(d, EXTRA_PEN_Q);
 27.1023 -		}
 27.1024 +        /*make sure that we remove a blocked domain from the other
 27.1025 +          extraq too*/
 27.1026 +        if (i == EXTRA_PEN_Q) {
 27.1027 +            if (extraq_on(d, EXTRA_UTIL_Q))
 27.1028 +                extraq_del(d, EXTRA_UTIL_Q);
 27.1029 +        }
 27.1030 +        else {
 27.1031 +            if (extraq_on(d, EXTRA_PEN_Q))
 27.1032 +                extraq_del(d, EXTRA_PEN_Q);
 27.1033 +        }
 27.1034  #endif
 27.1035 -	}
 27.1036 +    }
 27.1037  #endif
 27.1038 -	ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
 27.1039 -	ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
 27.1040 -	  sedf_runnable(d)));
 27.1041 +    ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
 27.1042 +    ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q), 
 27.1043 +                 sedf_runnable(d)));
 27.1044  }
 27.1045  #endif
 27.1046  
 27.1047  static inline struct task_slice sedf_do_extra_schedule (s_time_t now,
 27.1048 -    s_time_t end_xt, struct list_head *extraq[], int cpu) {
 27.1049 -	struct task_slice 		ret;
 27.1050 -	struct sedf_edom_info	*runinf;
 27.1051 -	
 27.1052 -	/* Enough time left to use for extratime? */
 27.1053 -	if (end_xt - now < EXTRA_QUANTUM)
 27.1054 -		goto return_idle;
 27.1055 +                                                        s_time_t end_xt, struct list_head *extraq[], int cpu) {
 27.1056 +    struct task_slice   ret;
 27.1057 +    struct sedf_edom_info *runinf;
 27.1058 + 
 27.1059 +    /* Enough time left to use for extratime? */
 27.1060 +    if (end_xt - now < EXTRA_QUANTUM)
 27.1061 +        goto return_idle;
 27.1062  #if (EXTRA == EXTRA_BLOCK_WEIGHT)
 27.1063 -	if (!list_empty(extraq[EXTRA_PEN_Q])) {
 27.1064 -		/*we still have elements on the level 0 extraq 
 27.1065 -		  => let those run first!*/
 27.1066 -		runinf   = list_entry(extraq[EXTRA_PEN_Q]->next, 
 27.1067 -		              struct sedf_edom_info, extralist[EXTRA_PEN_Q]);
 27.1068 -		runinf->status |= EXTRA_RUN_PEN;
 27.1069 -		ret.task = runinf->exec_domain;
 27.1070 -		ret.time = EXTRA_QUANTUM;
 27.1071 +    if (!list_empty(extraq[EXTRA_PEN_Q])) {
 27.1072 +        /*we still have elements on the level 0 extraq 
 27.1073 +          => let those run first!*/
 27.1074 +        runinf   = list_entry(extraq[EXTRA_PEN_Q]->next, 
 27.1075 +                              struct sedf_edom_info, extralist[EXTRA_PEN_Q]);
 27.1076 +        runinf->status |= EXTRA_RUN_PEN;
 27.1077 +        ret.task = runinf->exec_domain;
 27.1078 +        ret.time = EXTRA_QUANTUM;
 27.1079  #ifdef SEDF_STATS
 27.1080 -		runinf->pen_extra_slices++;
 27.1081 -#endif
 27.1082 -	} else
 27.1083 +        runinf->pen_extra_slices++;
 27.1084  #endif
 27.1085 -	if (!list_empty(extraq[EXTRA_UTIL_Q])) {
 27.1086 -		/*use elements from the normal extraqueue*/
 27.1087 -		runinf   = list_entry(extraq[EXTRA_UTIL_Q]->next,
 27.1088 -		              struct sedf_edom_info, extralist[EXTRA_UTIL_Q]);
 27.1089 -		runinf->status |= EXTRA_RUN_UTIL;
 27.1090 -		ret.task = runinf->exec_domain;
 27.1091 -		ret.time = EXTRA_QUANTUM;
 27.1092 -	}
 27.1093 -	else
 27.1094 -		goto return_idle;
 27.1095 +    } else
 27.1096 +#endif
 27.1097 +        if (!list_empty(extraq[EXTRA_UTIL_Q])) {
 27.1098 +            /*use elements from the normal extraqueue*/
 27.1099 +            runinf   = list_entry(extraq[EXTRA_UTIL_Q]->next,
 27.1100 +                                  struct sedf_edom_info, extralist[EXTRA_UTIL_Q]);
 27.1101 +            runinf->status |= EXTRA_RUN_UTIL;
 27.1102 +            ret.task = runinf->exec_domain;
 27.1103 +            ret.time = EXTRA_QUANTUM;
 27.1104 +        }
 27.1105 +        else
 27.1106 +            goto return_idle;
 27.1107  
 27.1108 -	ASSERT(ret.time > 0);
 27.1109 -	ASSERT(sedf_runnable(ret.task));
 27.1110 -	return ret;
 27.1111 -	
 27.1112 -return_idle:
 27.1113 -	ret.task = IDLETASK(cpu);
 27.1114 -	ret.time = end_xt - now;
 27.1115 -	ASSERT(ret.time > 0);
 27.1116 -	ASSERT(sedf_runnable(ret.task));
 27.1117 -	return ret;
 27.1118 +    ASSERT(ret.time > 0);
 27.1119 +    ASSERT(sedf_runnable(ret.task));
 27.1120 +    return ret;
 27.1121 + 
 27.1122 + return_idle:
 27.1123 +    ret.task = IDLETASK(cpu);
 27.1124 +    ret.time = end_xt - now;
 27.1125 +    ASSERT(ret.time > 0);
 27.1126 +    ASSERT(sedf_runnable(ret.task));
 27.1127 +    return ret;
 27.1128  }
 27.1129  /* Main scheduling function
 27.1130     Reasons for calling this function are:
 27.1131 @@ -713,126 +692,123 @@ return_idle:
 27.1132     -and various others ;) in general: determine which domain to run next*/
 27.1133  static struct task_slice sedf_do_schedule(s_time_t now)
 27.1134  {
 27.1135 -	int                   cpu      = current->processor;
 27.1136 -	struct list_head     *runq     = RUNQ(cpu);
 27.1137 -	struct list_head     *waitq    = WAITQ(cpu);
 27.1138 -	#if (EXTRA > EXTRA_OFF)
 27.1139 -	struct sedf_edom_info *inf     = EDOM_INFO(current);
 27.1140 -	struct list_head     *extraq[] = {EXTRAQ(cpu, EXTRA_PEN_Q),
 27.1141 -	                                  EXTRAQ(cpu, EXTRA_UTIL_Q)};
 27.1142 -	#endif
 27.1143 -	struct task_slice          ret;
 27.1144 -	/*int i = 0;*/
 27.1145 -	/*idle tasks don't need any of the following stuf*/
 27.1146 -	if (is_idle_task(current->domain))
 27.1147 -		goto check_waitq;
 27.1148 -	
 27.1149 -	/* create local state of the status of the domain, in order to avoid
 27.1150 -	   inconsistent state during scheduling decisions, because data for
 27.1151 -	   domain_runnable is not protected by the scheduling lock!*/
 27.1152 -	if(!domain_runnable(current))
 27.1153 -		inf->status |= SEDF_ASLEEP;
 27.1154 -	
 27.1155 -	if (inf->status & SEDF_ASLEEP)
 27.1156 -		inf->block_abs = now;
 27.1157 +    int                   cpu      = current->processor;
 27.1158 +    struct list_head     *runq     = RUNQ(cpu);
 27.1159 +    struct list_head     *waitq    = WAITQ(cpu);
 27.1160 +#if (EXTRA > EXTRA_OFF)
 27.1161 +    struct sedf_edom_info *inf     = EDOM_INFO(current);
 27.1162 +    struct list_head     *extraq[] = {EXTRAQ(cpu, EXTRA_PEN_Q),
 27.1163 +                                      EXTRAQ(cpu, EXTRA_UTIL_Q)};
 27.1164 +#endif
 27.1165 +    struct task_slice          ret;
 27.1166 +    /*int i = 0;*/
 27.1167 +    /*idle tasks don't need any of the following stuf*/
 27.1168 +    if (is_idle_task(current->domain))
 27.1169 +        goto check_waitq;
 27.1170 + 
 27.1171 +    /* create local state of the status of the domain, in order to avoid
 27.1172 +       inconsistent state during scheduling decisions, because data for
 27.1173 +       domain_runnable is not protected by the scheduling lock!*/
 27.1174 +    if(!domain_runnable(current))
 27.1175 +        inf->status |= SEDF_ASLEEP;
 27.1176 + 
 27.1177 +    if (inf->status & SEDF_ASLEEP)
 27.1178 +        inf->block_abs = now;
 27.1179  
 27.1180 -	#if (EXTRA > EXTRA_OFF)
 27.1181 -	if (unlikely(extra_runs(inf))) {
 27.1182 -		/*special treatment of domains running in extra time*/
 27.1183 -		desched_extra_dom(now, current);
 27.1184 -	}
 27.1185 -	else 
 27.1186 -	#endif
 27.1187 -	{
 27.1188 -		desched_edf_dom(now, current);
 27.1189 -	}
 27.1190 -check_waitq:
 27.1191 -	update_queues(now, runq, waitq);
 27.1192 -	
 27.1193 -	/*now simply pick the first domain from the runqueue, which has the
 27.1194 -	  earliest deadline, because the list is sorted*/
 27.1195 -	struct sedf_edom_info *runinf, *waitinf;
 27.1196 -	
 27.1197 -	if (!list_empty(runq)) {
 27.1198 -		runinf   = list_entry(runq->next,struct sedf_edom_info,list);
 27.1199 -		ret.task = runinf->exec_domain;
 27.1200 -		if (!list_empty(waitq)) {
 27.1201 -			waitinf  = list_entry(waitq->next,
 27.1202 -			               struct sedf_edom_info,list);
 27.1203 -			/*rerun scheduler, when scheduled domain reaches it's
 27.1204 -			  end of slice or the first domain from the waitqueue
 27.1205 -			  gets ready*/
 27.1206 -			ret.time = MIN(now + runinf->slice - runinf->cputime,
 27.1207 -			               PERIOD_BEGIN(waitinf)) - now;
 27.1208 -		}
 27.1209 -		else {
 27.1210 -			ret.time = runinf->slice - runinf->cputime;
 27.1211 -		}
 27.1212 -		CHECK(ret.time > 0);
 27.1213 -		goto sched_done;
 27.1214 -	}
 27.1215 -	
 27.1216 -	if (!list_empty(waitq)) {
 27.1217 -		waitinf  = list_entry(waitq->next,struct sedf_edom_info, list);
 27.1218 -		/*we could not find any suitable domain 
 27.1219 -		  => look for domains that are aware of extratime*/
 27.1220 -		#if (EXTRA > EXTRA_OFF)
 27.1221 -		ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
 27.1222 -		                             extraq, cpu);
 27.1223 -		#else
 27.1224 -		ret.task = IDLETASK(cpu);
 27.1225 -		ret.time = PERIOD_BEGIN(waitinf) - now;
 27.1226 -		#endif
 27.1227 -		CHECK(ret.time > 0);
 27.1228 -	}
 27.1229 -	else {
 27.1230 -		/*this could probably never happen, but one never knows...*/
 27.1231 -		/*it can... imagine a second CPU, which is pure scifi ATM,
 27.1232 -		  but one never knows ;)*/
 27.1233 -		ret.task = IDLETASK(cpu);
 27.1234 -		ret.time = SECONDS(1);
 27.1235 -	}
 27.1236 +#if (EXTRA > EXTRA_OFF)
 27.1237 +    if (unlikely(extra_runs(inf))) {
 27.1238 +        /*special treatment of domains running in extra time*/
 27.1239 +        desched_extra_dom(now, current);
 27.1240 +    }
 27.1241 +    else 
 27.1242 +#endif
 27.1243 +    {
 27.1244 +        desched_edf_dom(now, current);
 27.1245 +    }
 27.1246 + check_waitq:
 27.1247 +    update_queues(now, runq, waitq);
 27.1248 + 
 27.1249 +    /*now simply pick the first domain from the runqueue, which has the
 27.1250 +      earliest deadline, because the list is sorted*/
 27.1251 +    struct sedf_edom_info *runinf, *waitinf;
 27.1252 + 
 27.1253 +    if (!list_empty(runq)) {
 27.1254 +        runinf   = list_entry(runq->next,struct sedf_edom_info,list);
 27.1255 +        ret.task = runinf->exec_domain;
 27.1256 +        if (!list_empty(waitq)) {
 27.1257 +            waitinf  = list_entry(waitq->next,
 27.1258 +                                  struct sedf_edom_info,list);
 27.1259 +            /*rerun scheduler, when scheduled domain reaches it's
 27.1260 +              end of slice or the first domain from the waitqueue
 27.1261 +              gets ready*/
 27.1262 +            ret.time = MIN(now + runinf->slice - runinf->cputime,
 27.1263 +                           PERIOD_BEGIN(waitinf)) - now;
 27.1264 +        }
 27.1265 +        else {
 27.1266 +            ret.time = runinf->slice - runinf->cputime;
 27.1267 +        }
 27.1268 +        CHECK(ret.time > 0);
 27.1269 +        goto sched_done;
 27.1270 +    }
 27.1271 + 
 27.1272 +    if (!list_empty(waitq)) {
 27.1273 +        waitinf  = list_entry(waitq->next,struct sedf_edom_info, list);
 27.1274 +        /*we could not find any suitable domain 
 27.1275 +          => look for domains that are aware of extratime*/
 27.1276 +#if (EXTRA > EXTRA_OFF)
 27.1277 +        ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
 27.1278 +                                     extraq, cpu);
 27.1279 +#else
 27.1280 +        ret.task = IDLETASK(cpu);
 27.1281 +        ret.time = PERIOD_BEGIN(waitinf) - now;
 27.1282 +#endif
 27.1283 +        CHECK(ret.time > 0);
 27.1284 +    }
 27.1285 +    else {
 27.1286 +        /*this could probably never happen, but one never knows...*/
 27.1287 +        /*it can... imagine a second CPU, which is pure scifi ATM,
 27.1288 +          but one never knows ;)*/
 27.1289 +        ret.task = IDLETASK(cpu);
 27.1290 +        ret.time = SECONDS(1);
 27.1291 +    }
 27.1292  
 27.1293 -sched_done:	
 27.1294 -	/*TODO: Do something USEFUL when this happens and find out, why it
 27.1295 -	still can happen!!!*/
 27.1296 -	if (ret.time<0) {
 27.1297 -		printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
 27.1298 -		       ret.time);
 27.1299 -		ret.time = EXTRA_QUANTUM;
 27.1300 -	}
 27.1301 -	EDOM_INFO(ret.task)->sched_start_abs = now;
 27.1302 -	CHECK(ret.time > 0);
 27.1303 -	ASSERT(sedf_runnable(ret.task));
 27.1304 -	return ret;
 27.1305 + sched_done: 
 27.1306 +    /*TODO: Do something USEFUL when this happens and find out, why it
 27.1307 +      still can happen!!!*/
 27.1308 +    if (ret.time<0) {
 27.1309 +        printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
 27.1310 +               ret.time);
 27.1311 +        ret.time = EXTRA_QUANTUM;
 27.1312 +    }
 27.1313 +    EDOM_INFO(ret.task)->sched_start_abs = now;
 27.1314 +    CHECK(ret.time > 0);
 27.1315 +    ASSERT(sedf_runnable(ret.task));
 27.1316 +    return ret;
 27.1317  }
 27.1318  
 27.1319  static void sedf_sleep(struct exec_domain *d) {
 27.1320 -	PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
 27.1321 -	
 27.1322 -	if (is_idle_task(d->domain))
 27.1323 -		return;
 27.1324 +    PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
 27.1325 + 
 27.1326 +    if (is_idle_task(d->domain))
 27.1327 +        return;
 27.1328  
 27.1329 -	EDOM_INFO(d)->status |= SEDF_ASLEEP;
 27.1330 -	
 27.1331 -	if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
 27.1332 -#ifdef ADV_SCHED_HISTO
 27.1333 -		adv_sched_hist_start(d->processor);
 27.1334 +    EDOM_INFO(d)->status |= SEDF_ASLEEP;
 27.1335 + 
 27.1336 +    if ( test_bit(_VCPUF_running, &d->vcpu_flags) ) {
 27.1337 +        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 27.1338 +    }
 27.1339 +    else  {
 27.1340 +        if ( __task_on_queue(d) )
 27.1341 +            __del_from_queue(d);
 27.1342 +#if (EXTRA > EXTRA_OFF)
 27.1343 +        if (extraq_on(d, EXTRA_UTIL_Q)) 
 27.1344 +            extraq_del(d, EXTRA_UTIL_Q);
 27.1345  #endif
 27.1346 -		cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 27.1347 -	}
 27.1348 -	else  {
 27.1349 -		if ( __task_on_queue(d) )
 27.1350 -			__del_from_queue(d);
 27.1351 -		#if (EXTRA > EXTRA_OFF)
 27.1352 -		if (extraq_on(d, EXTRA_UTIL_Q)) 
 27.1353 -			extraq_del(d, EXTRA_UTIL_Q);
 27.1354 -		#endif
 27.1355 -		#if (EXTRA == EXTRA_BLOCK_WEIGHT)
 27.1356 -		if (extraq_on(d, EXTRA_PEN_Q))
 27.1357 -			extraq_del(d, EXTRA_PEN_Q);
 27.1358 -		#endif
 27.1359 -	}
 27.1360 +#if (EXTRA == EXTRA_BLOCK_WEIGHT)
 27.1361 +        if (extraq_on(d, EXTRA_PEN_Q))
 27.1362 +            extraq_del(d, EXTRA_PEN_Q);
 27.1363 +#endif
 27.1364 +    }
 27.1365  }
 27.1366  
 27.1367  /* This function wakes up a domain, i.e. moves them into the waitqueue
 27.1368 @@ -908,555 +884,554 @@ static void sedf_sleep(struct exec_domai
 27.1369   */
 27.1370  static inline void unblock_short_vcons
 27.1371  (struct sedf_edom_info* inf, s_time_t now) {
 27.1372 -	inf->deadl_abs += inf->period;
 27.1373 -	inf->cputime = 0;
 27.1374 +    inf->deadl_abs += inf->period;
 27.1375 +    inf->cputime = 0;
 27.1376  }
 27.1377  
 27.1378  static inline void unblock_short_cons(struct sedf_edom_info* inf, s_time_t now)
 27.1379  {
 27.1380 -	/*treat blocked time as consumed by the domain*/
 27.1381 -	inf->cputime += now - inf->block_abs;	
 27.1382 -	if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
 27.1383 -		/*we don't have a reasonable amount of time in 
 27.1384 -		  our slice left :( => start in next period!*/
 27.1385 -		unblock_short_vcons(inf, now);
 27.1386 -	}
 27.1387 +    /*treat blocked time as consumed by the domain*/
 27.1388 +    inf->cputime += now - inf->block_abs; 
 27.1389 +    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
 27.1390 +        /*we don't have a reasonable amount of time in 
 27.1391 +          our slice left :( => start in next period!*/
 27.1392 +        unblock_short_vcons(inf, now);
 27.1393 +    }
 27.1394  #ifdef SEDF_STATS
 27.1395 -	else
 27.1396 -		inf->short_cont++;
 27.1397 +    else
 27.1398 +        inf->short_cont++;
 27.1399  #endif
 27.1400  }
 27.1401  static inline void unblock_short_extra_support (struct sedf_edom_info* inf,
 27.1402 -   s_time_t now) {
 27.1403 -	/*this unblocking scheme tries to support the domain, by assigning it
 27.1404 -	   a priority in extratime distribution according to the loss of time
 27.1405 -	   in this slice due to blocking*/
 27.1406 -	s_time_t pen;
 27.1407 -	
 27.1408 -	/*no more realtime execution in this period!*/
 27.1409 -	inf->deadl_abs += inf->period;
 27.1410 -	if (likely(inf->block_abs)) {
 27.1411 -		//treat blocked time as consumed by the domain*/
 27.1412 -		/*inf->cputime += now - inf->block_abs;*/
 27.1413 -		/*penalty is time the domain would have
 27.1414 -		  had if it continued to run */
 27.1415 -		pen = (inf->slice - inf->cputime);
 27.1416 -		if (pen < 0) pen = 0;
 27.1417 -		/*accumulate all penalties over the periods*/
 27.1418 -		/*inf->short_block_lost_tot += pen;*/
 27.1419 -		/*set penalty to the current value*/
 27.1420 -		inf->short_block_lost_tot = pen;
 27.1421 -		/*not sure which one is better.. but seems to work well...*/
 27.1422 -		
 27.1423 -		if (inf->short_block_lost_tot) {
 27.1424 -			inf->score[0] = (inf->period << 10) /
 27.1425 -			                 inf->short_block_lost_tot;
 27.1426 +                                                s_time_t now) {
 27.1427 +    /*this unblocking scheme tries to support the domain, by assigning it
 27.1428 +    a priority in extratime distribution according to the loss of time
 27.1429 +    in this slice due to blocking*/
 27.1430 +    s_time_t pen;
 27.1431 + 
 27.1432 +    /*no more realtime execution in this period!*/
 27.1433 +    inf->deadl_abs += inf->period;
 27.1434 +    if (likely(inf->block_abs)) {
 27.1435 +        //treat blocked time as consumed by the domain*/
 27.1436 +        /*inf->cputime += now - inf->block_abs;*/
 27.1437 +        /*penalty is time the domain would have
 27.1438 +          had if it continued to run */
 27.1439 +        pen = (inf->slice - inf->cputime);
 27.1440 +        if (pen < 0) pen = 0;
 27.1441 +        /*accumulate all penalties over the periods*/
 27.1442 +        /*inf->short_block_lost_tot += pen;*/
 27.1443 +        /*set penalty to the current value*/
 27.1444 +        inf->short_block_lost_tot = pen;
 27.1445 +        /*not sure which one is better.. but seems to work well...*/
 27.1446 +  
 27.1447 +        if (inf->short_block_lost_tot) {
 27.1448 +            inf->score[0] = (inf->period << 10) /
 27.1449 +                inf->short_block_lost_tot;
 27.1450  #ifdef SEDF_STATS
 27.1451 -			inf->pen_extra_blocks++;
 27.1452 +            inf->pen_extra_blocks++;
 27.1453  #endif
 27.1454 -			if (extraq_on(inf->exec_domain, EXTRA_PEN_Q))
 27.1455 -				/*remove domain for possible resorting!*/
 27.1456 -				extraq_del(inf->exec_domain, EXTRA_PEN_Q);
 27.1457 -			else
 27.1458 -				/*remember that we want to be on the penalty q
 27.1459 -				  so that we can continue when we (un-)block
 27.1460 -				  in penalty-extratime*/
 27.1461 -				inf->status |= EXTRA_WANT_PEN_Q;
 27.1462 -			
 27.1463 -			/*(re-)add domain to the penalty extraq*/
 27.1464 -			extraq_add_sort_update(inf->exec_domain,
 27.1465 -					 EXTRA_PEN_Q, 0);
 27.1466 -		}
 27.1467 -	}
 27.1468 -	/*give it a fresh slice in the next period!*/
 27.1469 -	inf->cputime = 0;
 27.1470 +            if (extraq_on(inf->exec_domain, EXTRA_PEN_Q))
 27.1471 +                /*remove domain for possible resorting!*/
 27.1472 +                extraq_del(inf->exec_domain, EXTRA_PEN_Q);
 27.1473 +            else
 27.1474 +                /*remember that we want to be on the penalty q
 27.1475 +                  so that we can continue when we (un-)block
 27.1476 +                  in penalty-extratime*/
 27.1477 +                inf->status |= EXTRA_WANT_PEN_Q;
 27.1478 +   
 27.1479 +            /*(re-)add domain to the penalty extraq*/
 27.1480 +            extraq_add_sort_update(inf->exec_domain,
 27.1481 +                                   EXTRA_PEN_Q, 0);
 27.1482 +        }
 27.1483 +    }
 27.1484 +    /*give it a fresh slice in the next period!*/
 27.1485 +    inf->cputime = 0;
 27.1486  }
 27.1487  static inline void unblock_long_vcons(struct sedf_edom_info* inf, s_time_t now)
 27.1488  {
 27.1489 -	/* align to next future period */
 27.1490 -	inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
 27.1491 -	                 * inf->period;
 27.1492 -	inf->cputime = 0;
 27.1493 +    /* align to next future period */
 27.1494 +    inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
 27.1495 +        * inf->period;
 27.1496 +    inf->cputime = 0;
 27.1497  }
 27.1498  
 27.1499  static inline void unblock_long_cons_a (struct sedf_edom_info* inf,
 27.1500 -   s_time_t now) {
 27.1501 -	/*treat the time the domain was blocked in the
 27.1502 -	  CURRENT period as consumed by the domain*/
 27.1503 -	inf->cputime = (now - inf->deadl_abs) % inf->period;	
 27.1504 -	if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
 27.1505 -		/*we don't have a reasonable amount of time in our slice
 27.1506 -		  left :( => start in next period!*/
 27.1507 -		unblock_long_vcons(inf, now);
 27.1508 -	}
 27.1509 +                                        s_time_t now) {
 27.1510 +    /*treat the time the domain was blocked in the
 27.1511 +   CURRENT period as consumed by the domain*/
 27.1512 +    inf->cputime = (now - inf->deadl_abs) % inf->period; 
 27.1513 +    if (inf->cputime + EXTRA_QUANTUM > inf->slice) {
 27.1514 +        /*we don't have a reasonable amount of time in our slice
 27.1515 +          left :( => start in next period!*/
 27.1516 +        unblock_long_vcons(inf, now);
 27.1517 +    }
 27.1518  }
 27.1519  static inline void unblock_long_cons_b(struct sedf_edom_info* inf,s_time_t now) {
 27.1520 -	/*Conservative 2b*/
 27.1521 -	/*Treat the unblocking time as a start of a new period */
 27.1522 -	inf->deadl_abs = now + inf->period;
 27.1523 -	inf->cputime = 0;
 27.1524 +    /*Conservative 2b*/
 27.1525 +    /*Treat the unblocking time as a start of a new period */
 27.1526 +    inf->deadl_abs = now + inf->period;
 27.1527 +    inf->cputime = 0;
 27.1528  }
 27.1529  static inline void unblock_long_cons_c(struct sedf_edom_info* inf,s_time_t now) {
 27.1530 -	if (likely(inf->latency)) {
 27.1531 -		/*scale the slice and period accordingly to the latency hint*/
 27.1532 -		/*reduce period temporarily to the latency hint*/
 27.1533 -		inf->period = inf->latency;
 27.1534 -		/*this results in max. 4s slice/period length*/
 27.1535 -		ASSERT((inf->period < ULONG_MAX)
 27.1536 -		    && (inf->slice_orig < ULONG_MAX));
 27.1537 -		/*scale slice accordingly, so that utilisation stays the same*/
 27.1538 -		inf->slice = (inf->period * inf->slice_orig)
 27.1539 -		            / inf->period_orig;
 27.1540 -		inf->deadl_abs = now + inf->period;
 27.1541 -		inf->cputime = 0;
 27.1542 -	}	
 27.1543 -	else {
 27.1544 -		/*we don't have a latency hint.. use some other technique*/
 27.1545 -		unblock_long_cons_b(inf, now);
 27.1546 -	}
 27.1547 +    if (likely(inf->latency)) {
 27.1548 +        /*scale the slice and period accordingly to the latency hint*/
 27.1549 +        /*reduce period temporarily to the latency hint*/
 27.1550 +        inf->period = inf->latency;
 27.1551 +        /*this results in max. 4s slice/period length*/
 27.1552 +        ASSERT((inf->period < ULONG_MAX)
 27.1553 +               && (inf->slice_orig < ULONG_MAX));
 27.1554 +        /*scale slice accordingly, so that utilisation stays the same*/
 27.1555 +        inf->slice = (inf->period * inf->slice_orig)
 27.1556 +            / inf->period_orig;
 27.1557 +        inf->deadl_abs = now + inf->period;
 27.1558 +        inf->cputime = 0;
 27.1559 +    } 
 27.1560 +    else {
 27.1561 +        /*we don't have a latency hint.. use some other technique*/
 27.1562 +        unblock_long_cons_b(inf, now);
 27.1563 +    }
 27.1564  }
 27.1565  /*a new idea of dealing with short blocks: burst period scaling*/
 27.1566  static inline void unblock_short_burst(struct sedf_edom_info* inf, s_time_t now)
 27.1567  {
 27.1568 -	/*treat blocked time as consumed by the domain*/
 27.1569 -	inf->cputime += now - inf->block_abs;
 27.1570 -	
 27.1571 -	if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
 27.1572 -		/*if we can still use some time in the current slice
 27.1573 -		  then use it!*/
 27.1574 +    /*treat blocked time as consumed by the domain*/
 27.1575 +    inf->cputime += now - inf->block_abs;
 27.1576 + 
 27.1577 +    if (inf->cputime + EXTRA_QUANTUM <= inf->slice) {
 27.1578 +        /*if we can still use some time in the current slice
 27.1579 +          then use it!*/
 27.1580  #ifdef SEDF_STATS
 27.1581 -		/*we let the domain run in the current period*/
 27.1582 -		inf->short_cont++;
 27.1583 +        /*we let the domain run in the current period*/
 27.1584 +        inf->short_cont++;
 27.1585  #endif
 27.1586 -	}
 27.1587 -	else {
 27.1588 -		/*we don't have a reasonable amount of time in
 27.1589 -		  our slice left => switch to burst mode*/
 27.1590 -		if (likely(inf->unblock_abs)) {
 27.1591 -			/*set the period-length to the current blocking
 27.1592 -			  interval, possible enhancements: average over last
 27.1593 -			  blocking intervals, user-specified minimum,...*/
 27.1594 -			inf->period = now - inf->unblock_abs;
 27.1595 -			/*check for overflow on multiplication*/
 27.1596 -			ASSERT((inf->period < ULONG_MAX) 
 27.1597 -			    && (inf->slice_orig < ULONG_MAX));
 27.1598 -			/*scale slice accordingly, so that utilisation
 27.1599 -			  stays the same*/
 27.1600 -			inf->slice = (inf->period * inf->slice_orig)
 27.1601 -			            / inf->period_orig;
 27.1602 -			/*set new (shorter) deadline*/
 27.1603 -			inf->deadl_abs += inf->period;
 27.1604 -		}
 27.1605 -		else {
 27.1606 -			/*in case we haven't unblocked before
 27.1607 -			  start in next period!*/
 27.1608 -			inf->cputime=0;
 27.1609 -			inf->deadl_abs += inf->period;
 27.1610 -		}
 27.1611 -	}
 27.1612 -	inf->unblock_abs = now;
 27.1613 +    }
 27.1614 +    else {
 27.1615 +        /*we don't have a reasonable amount of time in
 27.1616 +          our slice left => switch to burst mode*/
 27.1617 +        if (likely(inf->unblock_abs)) {
 27.1618 +            /*set the period-length to the current blocking
 27.1619 +              interval, possible enhancements: average over last
 27.1620 +              blocking intervals, user-specified minimum,...*/
 27.1621 +            inf->period = now - inf->unblock_abs;
 27.1622 +            /*check for overflow on multiplication*/
 27.1623 +            ASSERT((inf->period < ULONG_MAX) 
 27.1624 +                   && (inf->slice_orig < ULONG_MAX));
 27.1625 +            /*scale slice accordingly, so that utilisation
 27.1626 +              stays the same*/
 27.1627 +            inf->slice = (inf->period * inf->slice_orig)
 27.1628 +                / inf->period_orig;
 27.1629 +            /*set new (shorter) deadline*/
 27.1630 +            inf->deadl_abs += inf->period;
 27.1631 +        }
 27.1632 +        else {
 27.1633 +            /*in case we haven't unblocked before
 27.1634 +              start in next period!*/
 27.1635 +            inf->cputime=0;
 27.1636 +            inf->deadl_abs += inf->period;
 27.1637 +        }
 27.1638 +    }
 27.1639 +    inf->unblock_abs = now;
 27.1640  }
 27.1641  static inline void unblock_long_burst(struct sedf_edom_info* inf, s_time_t now) {
 27.1642 -	if (unlikely(inf->latency && (inf->period > inf->latency))) {
 27.1643 -		/*scale the slice and period accordingly to the latency hint*/
 27.1644 -		inf->period = inf->latency;
 27.1645 -		/*check for overflows on multiplication*/
 27.1646 -		ASSERT((inf->period < ULONG_MAX)
 27.1647 -		    && (inf->slice_orig < ULONG_MAX));
 27.1648 -		/*scale slice accordingly, so that utilisation stays the same*/
 27.1649 -		inf->slice = (inf->period * inf->slice_orig)
 27.1650 -		            / inf->period_orig;
 27.1651 -		inf->deadl_abs = now + inf->period;
 27.1652 -		inf->cputime = 0;
 27.1653 -	}
 27.1654 -	else {
 27.1655 -		/*we don't have a latency hint.. or we are currently in 
 27.1656 -		 "burst mode": use some other technique
 27.1657 -		  NB: this should be in fact the normal way of operation,
 27.1658 -		  when we are in sync with the device!*/
 27.1659 -		unblock_long_cons_b(inf, now);
 27.1660 -	}
 27.1661 -	inf->unblock_abs = now;
 27.1662 +    if (unlikely(inf->latency && (inf->period > inf->latency))) {
 27.1663 +        /*scale the slice and period accordingly to the latency hint*/
 27.1664 +        inf->period = inf->latency;
 27.1665 +        /*check for overflows on multiplication*/
 27.1666 +        ASSERT((inf->period < ULONG_MAX)
 27.1667 +               && (inf->slice_orig < ULONG_MAX));
 27.1668 +        /*scale slice accordingly, so that utilisation stays the same*/
 27.1669 +        inf->slice = (inf->period * inf->slice_orig)
 27.1670 +            / inf->period_orig;
 27.1671 +        inf->deadl_abs = now + inf->period;
 27.1672 +        inf->cputime = 0;
 27.1673 +    }
 27.1674 +    else {
 27.1675 +        /*we don't have a latency hint.. or we are currently in 
 27.1676 +          "burst mode": use some other technique
 27.1677 +          NB: this should be in fact the normal way of operation,
 27.1678 +          when we are in sync with the device!*/
 27.1679 +        unblock_long_cons_b(inf, now);
 27.1680 +    }
 27.1681 +    inf->unblock_abs = now;
 27.1682  }
 27.1683  
 27.1684 -#define DOMAIN_EDF 		1
 27.1685 -#define DOMAIN_EXTRA_PEN 	2
 27.1686 -#define DOMAIN_EXTRA_UTIL 	3
 27.1687 -#define DOMAIN_IDLE 		4
 27.1688 +#define DOMAIN_EDF   1
 27.1689 +#define DOMAIN_EXTRA_PEN  2
 27.1690 +#define DOMAIN_EXTRA_UTIL  3
 27.1691 +#define DOMAIN_IDLE   4
 27.1692  static inline int get_run_type(struct exec_domain* d) {
 27.1693 -	struct sedf_edom_info* inf = EDOM_INFO(d);
 27.1694 -	if (is_idle_task(d->domain))
 27.1695 -		return DOMAIN_IDLE;
 27.1696 -	if (inf->status & EXTRA_RUN_PEN)
 27.1697 -		return DOMAIN_EXTRA_PEN;
 27.1698 -	if (inf->status & EXTRA_RUN_UTIL)
 27.1699 -		return DOMAIN_EXTRA_UTIL;
 27.1700 -	return DOMAIN_EDF;
 27.1701 +    struct sedf_edom_info* inf = EDOM_INFO(d);
 27.1702 +    if (is_idle_task(d->domain))
 27.1703 +        return DOMAIN_IDLE;
 27.1704 +    if (inf->status & EXTRA_RUN_PEN)
 27.1705 +        return DOMAIN_EXTRA_PEN;
 27.1706 +    if (inf->status & EXTRA_RUN_UTIL)
 27.1707 +        return DOMAIN_EXTRA_UTIL;
 27.1708 +    return DOMAIN_EDF;
 27.1709  }
 27.1710  /*Compares two domains in the relation of whether the one is allowed to
 27.1711    interrupt the others execution.
 27.1712    It returns true (!=0) if a switch to the other domain is good.
 27.1713    Current Priority scheme is as follows:
 27.1714 -  	EDF > L0 (penalty based) extra-time > 
 27.1715 -  	L1 (utilization) extra-time > idle-domain
 27.1716 +   EDF > L0 (penalty based) extra-time > 
 27.1717 +   L1 (utilization) extra-time > idle-domain
 27.1718    In the same class priorities are assigned as following:
 27.1719 -  	EDF: early deadline > late deadline
 27.1720 -  	L0 extra-time: lower score > higher score*/
 27.1721 +   EDF: early deadline > late deadline
 27.1722 +   L0 extra-time: lower score > higher score*/
 27.1723  static inline int should_switch(struct exec_domain* cur,
 27.1724 -   struct exec_domain* other, s_time_t now) {
 27.1725 -	struct sedf_edom_info *cur_inf, *other_inf;
 27.1726 -	cur_inf   = EDOM_INFO(cur);
 27.1727 -	other_inf = EDOM_INFO(other);
 27.1728 -	
 27.1729 -	/*check whether we need to make an earlier sched-decision*/
 27.1730 -	if ((PERIOD_BEGIN(other_inf) < 
 27.1731 -	     schedule_data[other->processor].s_timer.expires))
 27.1732 -		return 1;
 27.1733 -	/*no timing-based switches need to be taken into account here*/
 27.1734 -	switch (get_run_type(cur)) {
 27.1735 -		case DOMAIN_EDF:
 27.1736 -			/* do not interrupt a running EDF domain */ 
 27.1737 -			return 0;
 27.1738 -		case DOMAIN_EXTRA_PEN:
 27.1739 -			/*check whether we also want 
 27.1740 -			  the L0 ex-q with lower score*/
 27.1741 -			if ((other_inf->status & EXTRA_WANT_PEN_Q)
 27.1742 -			&&  (other_inf->score[EXTRA_PEN_Q] < 
 27.1743 -			     cur_inf->score[EXTRA_PEN_Q]))
 27.1744 -				return 1;
 27.1745 -			else	return 0;
 27.1746 -		case DOMAIN_EXTRA_UTIL:
 27.1747 -			/*check whether we want the L0 extraq, don't
 27.1748 -			  switch if both domains want L1 extraq */
 27.1749 -			if (other_inf->status & EXTRA_WANT_PEN_Q)
 27.1750 -				return 1;
 27.1751 -			else	return 0;
 27.1752 -		case DOMAIN_IDLE:
 27.1753 -			return 1;
 27.1754 -	}
 27.1755 -	return 1;
 27.1756 +                                struct exec_domain* other, s_time_t now) {
 27.1757 +    struct sedf_edom_info *cur_inf, *other_inf;
 27.1758 +    cur_inf   = EDOM_INFO(cur);
 27.1759 +    other_inf = EDOM_INFO(other);
 27.1760 + 
 27.1761 + /*check whether we need to make an earlier sched-decision*/
 27.1762 +    if ((PERIOD_BEGIN(other_inf) < 
 27.1763 +         schedule_data[other->processor].s_timer.expires))
 27.1764 +        return 1;
 27.1765 +    /*no timing-based switches need to be taken into account here*/
 27.1766 +    switch (get_run_type(cur)) {
 27.1767 +    case DOMAIN_EDF:
 27.1768 +        /* do not interrupt a running EDF domain */ 
 27.1769 +        return 0;
 27.1770 +    case DOMAIN_EXTRA_PEN:
 27.1771 +        /*check whether we also want 
 27.1772 +          the L0 ex-q with lower score*/
 27.1773 +        if ((other_inf->status & EXTRA_WANT_PEN_Q)
 27.1774 +            &&  (other_inf->score[EXTRA_PEN_Q] < 
 27.1775 +                 cur_inf->score[EXTRA_PEN_Q]))
 27.1776 +            return 1;
 27.1777 +        else return 0;
 27.1778 +    case DOMAIN_EXTRA_UTIL:
 27.1779 +        /*check whether we want the L0 extraq, don't
 27.1780 +          switch if both domains want L1 extraq */
 27.1781 +        if (other_inf->status & EXTRA_WANT_PEN_Q)
 27.1782 +            return 1;
 27.1783 +        else return 0;
 27.1784 +    case DOMAIN_IDLE:
 27.1785 +        return 1;
 27.1786 +    }
 27.1787 +    return 1;
 27.1788  }
 27.1789  void sedf_wake(struct exec_domain *d) {
 27.1790 -	s_time_t              now = NOW();
 27.1791 -	struct sedf_edom_info* inf = EDOM_INFO(d);
 27.1792 -	
 27.1793 -	PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
 27.1794 -	
 27.1795 -	if (unlikely(is_idle_task(d->domain)))
 27.1796 -		return;
 27.1797 -			
 27.1798 -	if ( unlikely(__task_on_queue(d)) ) {
 27.1799 -		PRINT(3,"\tdomain %i.%i is already in some queue\n",
 27.1800 -		      d->domain->domain_id, d->vcpu_id);
 27.1801 -		return;
 27.1802 -	}
 27.1803 -	ASSERT(!sedf_runnable(d));
 27.1804 -	inf->status &= ~SEDF_ASLEEP;
 27.1805 -	ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
 27.1806 -	ASSERT(!extraq_on(d, EXTRA_PEN_Q));
 27.1807 -	
 27.1808 -	if (unlikely(inf->deadl_abs == 0))
 27.1809 -		/*initial setup of the deadline*/
 27.1810 -		inf->deadl_abs = now + inf->slice;
 27.1811 -		
 27.1812 -	PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
 27.1813 -	        "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
 27.1814 -		 inf->period, now);
 27.1815 -#ifdef SEDF_STATS	
 27.1816 -	inf->block_tot++;
 27.1817 -#endif
 27.1818 -	if (unlikely(now < PERIOD_BEGIN(inf))) {
 27.1819 -		PRINT(4,"extratime unblock\n");
 27.1820 -		/* unblocking in extra-time! */
 27.1821 -		#if (EXTRA == EXTRA_BLOCK_WEIGHT)
 27.1822 -		if (inf->status & EXTRA_WANT_PEN_Q) {
 27.1823 -			/*we have a domain that wants compensation
 27.1824 -			  for block penalty and did just block in
 27.1825 -			  its compensation time. Give it another
 27.1826 -			  chance!*/
 27.1827 -			extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
 27.1828 -		}
 27.1829 -		#endif
 27.1830 -		extraq_check_add_unblocked(d, 0);
 27.1831 -	}		
 27.1832 -	else {		
 27.1833 -		if (now < inf->deadl_abs) {
 27.1834 -			PRINT(4,"short unblocking\n");
 27.1835 -			/*short blocking*/
 27.1836 -#ifdef SEDF_STATS
 27.1837 -			inf->short_block_tot++;
 27.1838 +    s_time_t              now = NOW();
 27.1839 +    struct sedf_edom_info* inf = EDOM_INFO(d);
 27.1840 + 
 27.1841 +    PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id, d->vcpu_id);
 27.1842 + 
 27.1843 +    if (unlikely(is_idle_task(d->domain)))
 27.1844 +        return;
 27.1845 +   
 27.1846 +    if ( unlikely(__task_on_queue(d)) ) {
 27.1847 +        PRINT(3,"\tdomain %i.%i is already in some queue\n",
 27.1848 +              d->domain->domain_id, d->vcpu_id);
 27.1849 +        return;
 27.1850 +    }
 27.1851 +    ASSERT(!sedf_runnable(d));
 27.1852 +    inf->status &= ~SEDF_ASLEEP;
 27.1853 +    ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
 27.1854 +    ASSERT(!extraq_on(d, EXTRA_PEN_Q));
 27.1855 + 
 27.1856 +    if (unlikely(inf->deadl_abs == 0))
 27.1857 +        /*initial setup of the deadline*/
 27.1858 +        inf->deadl_abs = now + inf->slice;
 27.1859 +  
 27.1860 +    PRINT(3,"waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
 27.1861 +          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
 27.1862 +          inf->period, now);
 27.1863 +#ifdef SEDF_STATS 
 27.1864 +    inf->block_tot++;
 27.1865  #endif
 27.1866 -			#if (UNBLOCK <= UNBLOCK_ATROPOS)
 27.1867 -			unblock_short_vcons(inf, now);
 27.1868 -			#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
 27.1869 -			unblock_short_cons(inf, now);
 27.1870 -			#elif (UNBLOCK == UNBLOCK_BURST)
 27.1871 -			unblock_short_burst(inf, now);
 27.1872 -			#elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
 27.1873 -			unblock_short_extra_support(inf, now);
 27.1874 -			#endif
 27.1875 -
 27.1876 -			extraq_check_add_unblocked(d, 1);
 27.1877 -		}
 27.1878 -		else {
 27.1879 -			PRINT(4,"long unblocking\n");
 27.1880 -			/*long unblocking*/
 27.1881 +    if (unlikely(now < PERIOD_BEGIN(inf))) {
 27.1882 +        PRINT(4,"extratime unblock\n");
 27.1883 +        /* unblocking in extra-time! */
 27.1884 +#if (EXTRA == EXTRA_BLOCK_WEIGHT)
 27.1885 +        if (inf->status & EXTRA_WANT_PEN_Q) {
 27.1886 +            /*we have a domain that wants compensation
 27.1887 +              for block penalty and did just block in
 27.1888 +              its compensation time. Give it another
 27.1889 +              chance!*/
 27.1890 +            extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
 27.1891 +        }
 27.1892 +#endif
 27.1893 +        extraq_check_add_unblocked(d, 0);
 27.1894 +    }  
 27.1895 +    else {  
 27.1896 +        if (now < inf->deadl_abs) {
 27.1897 +            PRINT(4,"short unblocking\n");
 27.1898 +            /*short blocking*/
 27.1899  #ifdef SEDF_STATS
 27.1900 -			inf->long_block_tot++;
 27.1901 +            inf->short_block_tot++;
 27.1902  #endif
 27.1903 -			#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
 27.1904 -			unblock_long_vcons(inf, now);
 27.1905 -			#elif (UNBLOCK == UNBLOCK_EDF \
 27.1906 -			    || UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
 27.1907 -			unblock_long_cons_b(inf, now);
 27.1908 -			#elif (UNBLOCK == UNBLOCK_ATROPOS)
 27.1909 -			unblock_long_cons_c(inf, now);
 27.1910 -			#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
 27.1911 -			unblock_long_cons_b(inf, now);
 27.1912 -			/*unblock_short_cons_c(inf, now);*/
 27.1913 -			#elif (UNBLOCK == UNBLOCK_BURST)
 27.1914 -			unblock_long_burst(inf, now);
 27.1915 -			#endif
 27.1916 +#if (UNBLOCK <= UNBLOCK_ATROPOS)
 27.1917 +            unblock_short_vcons(inf, now);
 27.1918 +#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
 27.1919 +            unblock_short_cons(inf, now);
 27.1920 +#elif (UNBLOCK == UNBLOCK_BURST)
 27.1921 +            unblock_short_burst(inf, now);
 27.1922 +#elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
 27.1923 +            unblock_short_extra_support(inf, now);
 27.1924 +#endif
 27.1925  
 27.1926 -			extraq_check_add_unblocked(d, 1);
 27.1927 -		}
 27.1928 -	}
 27.1929 -	PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
 27.1930 -	        "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
 27.1931 -		inf->period, now);
 27.1932 -	if (PERIOD_BEGIN(inf) > now) {
 27.1933 -		__add_to_waitqueue_sort(d);
 27.1934 -		PRINT(3,"added to waitq\n");
 27.1935 -	}
 27.1936 -	else {
 27.1937 -		__add_to_runqueue_sort(d);
 27.1938 -		PRINT(3,"added to runq\n");
 27.1939 -	}
 27.1940 -	
 27.1941 +            extraq_check_add_unblocked(d, 1);
 27.1942 +        }
 27.1943 +        else {
 27.1944 +            PRINT(4,"long unblocking\n");
 27.1945 +            /*long unblocking*/
 27.1946  #ifdef SEDF_STATS
 27.1947 -	/*do some statistics here...*/
 27.1948 -	if (inf->block_abs != 0) {
 27.1949 -		inf->block_time_tot += now - inf->block_abs;
 27.1950 -		inf->penalty_time_tot +=
 27.1951 -		   PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
 27.1952 -	}
 27.1953 +            inf->long_block_tot++;
 27.1954  #endif
 27.1955 -	/*sanity check: make sure each extra-aware domain IS on the util-q!*/
 27.1956 -	ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
 27.1957 -	ASSERT(__task_on_queue(d));
 27.1958 -	/*check whether the awakened task needs to invoke the do_schedule
 27.1959 -	  routine. Try to avoid unnecessary runs but:
 27.1960 -	  Save approximation: Always switch to scheduler!*/
 27.1961 -	if (should_switch(schedule_data[d->processor].curr, d, now)){
 27.1962 -#ifdef ADV_SCHED_HISTO
 27.1963 -		adv_sched_hist_start(d->processor);
 27.1964 +#if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
 27.1965 +            unblock_long_vcons(inf, now);
 27.1966 +#elif (UNBLOCK == UNBLOCK_EDF \
 27.1967 +       || UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
 27.1968 +            unblock_long_cons_b(inf, now);
 27.1969 +#elif (UNBLOCK == UNBLOCK_ATROPOS)
 27.1970 +            unblock_long_cons_c(inf, now);
 27.1971 +#elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
 27.1972 +            unblock_long_cons_b(inf, now);
 27.1973 +            /*unblock_short_cons_c(inf, now);*/
 27.1974 +#elif (UNBLOCK == UNBLOCK_BURST)
 27.1975 +            unblock_long_burst(inf, now);
 27.1976  #endif
 27.1977 -		cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 27.1978 -	}
 27.1979 +
 27.1980 +            extraq_check_add_unblocked(d, 1);
 27.1981 +        }
 27.1982 +    }
 27.1983 +    PRINT(3,"woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64" "\
 27.1984 +          "now= %"PRIu64")\n", d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
 27.1985 +          inf->period, now);
 27.1986 +    if (PERIOD_BEGIN(inf) > now) {
 27.1987 +        __add_to_waitqueue_sort(d);
 27.1988 +        PRINT(3,"added to waitq\n");
 27.1989 +    }
 27.1990 +    else {
 27.1991 +        __add_to_runqueue_sort(d);
 27.1992 +        PRINT(3,"added to runq\n");
 27.1993 +    }
 27.1994 + 
 27.1995 +#ifdef SEDF_STATS
 27.1996 +    /*do some statistics here...*/
 27.1997 +    if (inf->block_abs != 0) {
 27.1998 +        inf->block_time_tot += now - inf->block_abs;
 27.1999 +        inf->penalty_time_tot +=
 27.2000 +            PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
 27.2001 +    }
 27.2002 +#endif
 27.2003 +    /*sanity check: make sure each extra-aware domain IS on the util-q!*/
 27.2004 +    ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
 27.2005 +    ASSERT(__task_on_queue(d));
 27.2006 +    /*check whether the awakened task needs to invoke the do_schedule
 27.2007 +      routine. Try to avoid unnecessary runs but:
 27.2008 +      Save approximation: Always switch to scheduler!*/
 27.2009 +    if (should_switch(schedule_data[d->processor].curr, d, now))
 27.2010 +        cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
 27.2011  }
 27.2012  
 27.2013  /*Print a lot of use-{full, less} information about a domains in the system*/
 27.2014  static void sedf_dump_domain(struct exec_domain *d) {
 27.2015 -	printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
 27.2016 -		test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
 27.2017 -	printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
 27.2018 -	  EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
 27.2019 -	  EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
 27.2020 -	 (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
 27.2021 -	  EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
 27.2022 -	if (d->cpu_time !=0)
 27.2023 -		printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
 27.2024 -		                 / d->cpu_time);
 27.2025 +    printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
 27.2026 +           test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
 27.2027 +    printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64" sc=%i xtr(%s)=%"PRIu64" ew=%hu",
 27.2028 +           EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
 27.2029 +           EDOM_INFO(d)->weight, d->cpu_time, EDOM_INFO(d)->score[EXTRA_UTIL_Q],
 27.2030 +           (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
 27.2031 +           EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
 27.2032 +    if (d->cpu_time !=0)
 27.2033 +        printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
 27.2034 +               / d->cpu_time);
 27.2035  #ifdef SEDF_STATS
 27.2036 -	if (EDOM_INFO(d)->block_time_tot!=0)
 27.2037 -		printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
 27.2038 -		                     EDOM_INFO(d)->block_time_tot);
 27.2039 -	if (EDOM_INFO(d)->block_tot!=0)
 27.2040 -		printf("\n   blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
 27.2041 -		       "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
 27.2042 -		    EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
 27.2043 -		   (EDOM_INFO(d)->short_block_tot * 100) 
 27.2044 -		  / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
 27.2045 -		   (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
 27.2046 -		    EDOM_INFO(d)->pen_extra_blocks,
 27.2047 -		    EDOM_INFO(d)->pen_extra_slices,
 27.2048 -		    EDOM_INFO(d)->long_block_tot,
 27.2049 -		   (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
 27.2050 -		   (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
 27.2051 -		   (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
 27.2052 +    if (EDOM_INFO(d)->block_time_tot!=0)
 27.2053 +        printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
 27.2054 +               EDOM_INFO(d)->block_time_tot);
 27.2055 +    if (EDOM_INFO(d)->block_tot!=0)
 27.2056 +        printf("\n   blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
 27.2057 +               "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
 27.2058 +               EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
 27.2059 +               (EDOM_INFO(d)->short_block_tot * 100) 
 27.2060 +               / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
 27.2061 +               (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
 27.2062 +               EDOM_INFO(d)->pen_extra_blocks,
 27.2063 +               EDOM_INFO(d)->pen_extra_slices,
 27.2064 +               EDOM_INFO(d)->long_block_tot,
 27.2065 +               (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
 27.2066 +               (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
 27.2067 +               (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
 27.2068  #endif
 27.2069 -	printf("\n");
 27.2070 +    printf("\n");
 27.2071  }
 27.2072  
 27.2073  /*dumps all domains on hte specified cpu*/
 27.2074  static void sedf_dump_cpu_state(int i)
 27.2075  {
 27.2076 -	struct list_head      *list, *queue, *tmp;
 27.2077 -	struct sedf_edom_info *d_inf;
 27.2078 -	struct domain         *d;
 27.2079 -	struct exec_domain    *ed;
 27.2080 -	int loop = 0;
 27.2081 -	
 27.2082 -	printk("now=%"PRIu64"\n",NOW());
 27.2083 -	queue = RUNQ(i);
 27.2084 -	printk("RUNQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
 27.2085 -		(unsigned long) queue->next, (unsigned long) queue->prev);
 27.2086 -	list_for_each_safe ( list, tmp, queue ) {
 27.2087 -		printk("%3d: ",loop++);
 27.2088 -		d_inf = list_entry(list, struct sedf_edom_info, list);
 27.2089 -		sedf_dump_domain(d_inf->exec_domain);
 27.2090 -	}
 27.2091 -	
 27.2092 -	queue = WAITQ(i); loop = 0;
 27.2093 -	printk("\nWAITQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
 27.2094 -		(unsigned long) queue->next, (unsigned long) queue->prev);
 27.2095 -	list_for_each_safe ( list, tmp, queue ) {
 27.2096 -		printk("%3d: ",loop++);
 27.2097 -		d_inf = list_entry(list, struct sedf_edom_info, list);
 27.2098 -		sedf_dump_domain(d_inf->exec_domain);
 27.2099 -	}
 27.2100 -	
 27.2101 -	queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
 27.2102 -	printk("\nEXTRAQ (penalty) rq %lx   n: %lx, p: %lx\n",
 27.2103 -	       (unsigned long)queue, (unsigned long) queue->next,
 27.2104 -	       (unsigned long) queue->prev);
 27.2105 -	list_for_each_safe ( list, tmp, queue ) {
 27.2106 -		d_inf = list_entry(list, struct sedf_edom_info,
 27.2107 -		                   extralist[EXTRA_PEN_Q]);
 27.2108 -		printk("%3d: ",loop++);
 27.2109 -		sedf_dump_domain(d_inf->exec_domain);
 27.2110 -	}
 27.2111 -	
 27.2112 -	queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
 27.2113 -	printk("\nEXTRAQ (utilization) rq %lx   n: %lx, p: %lx\n",
 27.2114 -	       (unsigned long)queue, (unsigned long) queue->next,
 27.2115 -	       (unsigned long) queue->prev);
 27.2116 -	list_for_each_safe ( list, tmp, queue )	{
 27.2117 -		d_inf = list_entry(list, struct sedf_edom_info,
 27.2118 -		                   extralist[EXTRA_UTIL_Q]);
 27.2119 -		printk("%3d: ",loop++);
 27.2120 -		sedf_dump_domain(d_inf->exec_domain);
 27.2121 -	}
 27.2122 -	
 27.2123 -	loop = 0;
 27.2124 -	printk("\nnot on Q\n");
 27.2125 -	for_each_domain(d)
 27.2126 -		for_each_exec_domain(d, ed)
 27.2127 -		{
 27.2128 -			if (!__task_on_queue(ed) && (ed->processor == i)) {
 27.2129 -				printk("%3d: ",loop++);
 27.2130 -				sedf_dump_domain(ed);
 27.2131 -			}
 27.2132 -		}
 27.2133 +    struct list_head      *list, *queue, *tmp;
 27.2134 +    struct sedf_edom_info *d_inf;
 27.2135 +    struct domain         *d;
 27.2136 +    struct exec_domain    *ed;
 27.2137 +    int loop = 0;
 27.2138 + 
 27.2139 +    printk("now=%"PRIu64"\n",NOW());
 27.2140 +    queue = RUNQ(i);
 27.2141 +    printk("RUNQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
 27.2142 +           (unsigned long) queue->next, (unsigned long) queue->prev);
 27.2143 +    list_for_each_safe ( list, tmp, queue ) {
 27.2144 +        printk("%3d: ",loop++);
 27.2145 +        d_inf = list_entry(list, struct sedf_edom_info, list);
 27.2146 +        sedf_dump_domain(d_inf->exec_domain);
 27.2147 +    }
 27.2148 + 
 27.2149 +    queue = WAITQ(i); loop = 0;
 27.2150 +    printk("\nWAITQ rq %lx   n: %lx, p: %lx\n",  (unsigned long)queue,
 27.2151 +           (unsigned long) queue->next, (unsigned long) queue->prev);
 27.2152 +    list_for_each_safe ( list, tmp, queue ) {
 27.2153 +        printk("%3d: ",loop++);
 27.2154 +        d_inf = list_entry(list, struct sedf_edom_info, list);
 27.2155 +        sedf_dump_domain(d_inf->exec_domain);
 27.2156 +    }
 27.2157 + 
 27.2158 +    queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
 27.2159 +    printk("\nEXTRAQ (penalty) rq %lx   n: %lx, p: %lx\n",
 27.2160 +           (unsigned long)queue, (unsigned long) queue->next,
 27.2161 +           (unsigned long) queue->prev);
 27.2162 +    list_for_each_safe ( list, tmp, queue ) {
 27.2163 +        d_inf = list_entry(list, struct sedf_edom_info,
 27.2164 +                           extralist[EXTRA_PEN_Q]);
 27.2165 +        printk("%3d: ",loop++);
 27.2166 +        sedf_dump_domain(d_inf->exec_domain);
 27.2167 +    }
 27.2168 + 
 27.2169 +    queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
 27.2170 +    printk("\nEXTRAQ (utilization) rq %lx   n: %lx, p: %lx\n",
 27.2171 +           (unsigned long)queue, (unsigned long) queue->next,
 27.2172 +           (unsigned long) queue->prev);
 27.2173 +    list_for_each_safe ( list, tmp, queue ) {
 27.2174 +        d_inf = list_entry(list, struct sedf_edom_info,
 27.2175 +                           extralist[EXTRA_UTIL_Q]);
 27.2176 +        printk("%3d: ",loop++);
 27.2177 +        sedf_dump_domain(d_inf->exec_domain);
 27.2178 +    }
 27.2179 + 
 27.2180 +    loop = 0;
 27.2181 +    printk("\nnot on Q\n");
 27.2182 +    for_each_domain(d)
 27.2183 +        for_each_exec_domain(d, ed)
 27.2184 +    {
 27.2185 +        if (!__task_on_queue(ed) && (ed->processor == i)) {
 27.2186 +            printk("%3d: ",loop++);
 27.2187 +            sedf_dump_domain(ed);
 27.2188 +        }
 27.2189 +    }
 27.2190  }
 27.2191  /*Adjusts periods and slices of the domains accordingly to their weights*/
 27.2192  static inline int sedf_adjust_weights(struct sched_adjdom_cmd *cmd) {
 27.2193 -	struct exec_domain *p;
 27.2194 -	struct domain      *d;
 27.2195 -	int                 sumw[NR_CPUS];
 27.2196 -	s_time_t            sumt[NR_CPUS];
 27.2197 -	int                 cpu;
 27.2198 -	
 27.2199 -	for (cpu=0; cpu < NR_CPUS; cpu++) {
 27.2200 -		sumw[cpu] = 0;
 27.2201 -		sumt[cpu] = 0;
 27.2202 -	}
 27.2203 -	/*sum up all weights*/
 27.2204 -	for_each_domain(d)
 27.2205 -	  for_each_exec_domain(d, p) {
 27.2206 -		if (EDOM_INFO(p)->weight)
 27.2207 -			sumw[p->processor] += EDOM_INFO(p)->weight;
 27.2208 -		else {
 27.2209 -			/*don't modify domains who don't have a weight, but sum
 27.2210 -			  up the time they need, projected to a WEIGHT_PERIOD,
 27.2211 -			  so that this time is not given to the weight-driven
 27.2212 -			  domains*/
 27.2213 -			/*check for overflows*/
 27.2214 -			ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
 27.2215 -			    && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
 27.2216 -			sumt[p->processor] += (WEIGHT_PERIOD *
 27.2217 -			    EDOM_INFO(p)->slice_orig) / EDOM_INFO(p)->period_orig;
 27.2218 -		}
 27.2219 -	}
 27.2220 -	/*adjust all slices (and periods) to the new weight*/
 27.2221 -	for_each_domain(d) 
 27.2222 -	  for_each_exec_domain(d, p) {
 27.2223 -		if (EDOM_INFO(p)->weight) {
 27.2224 -			EDOM_INFO(p)->period_orig = 
 27.2225 -			     EDOM_INFO(p)->period = WEIGHT_PERIOD;
 27.2226 -			EDOM_INFO(p)->slice_orig  =
 27.2227 -			      EDOM_INFO(p)->slice = (EDOM_INFO(p)->weight *
 27.2228 -			      (WEIGHT_PERIOD -WEIGHT_SAFETY -
 27.2229 -			       sumt[p->processor])) / sumw[p->processor];
 27.2230 -		}
 27.2231 -	}
 27.2232 -	return 0;
 27.2233 +    struct exec_domain *p;
 27.2234 +    struct domain      *d;
 27.2235 +    int                 sumw[NR_CPUS];
 27.2236 +    s_time_t            sumt[NR_CPUS];
 27.2237 +    int                 cpu;
 27.2238 + 
 27.2239 +    for (cpu=0; cpu < NR_CPUS; cpu++) {
 27.2240 +        sumw[cpu] = 0;
 27.2241 +        sumt[cpu] = 0;
 27.2242 +    }
 27.2243 +    /*sum up all weights*/
 27.2244 +    for_each_domain(d)
 27.2245 +        for_each_exec_domain(d, p) {
 27.2246 +        if (EDOM_INFO(p)->weight)
 27.2247 +            sumw[p->processor] += EDOM_INFO(p)->weight;
 27.2248 +        else {
 27.2249 +            /*don't modify domains who don't have a weight, but sum
 27.2250 +              up the time they need, projected to a WEIGHT_PERIOD,
 27.2251 +              so that this time is not given to the weight-driven
 27.2252 +              domains*/
 27.2253 +            /*check for overflows*/
 27.2254 +            ASSERT((WEIGHT_PERIOD < ULONG_MAX) 
 27.2255 +                   && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
 27.2256 +            sumt[p->processor] += 
 27.2257 +                (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) / 
 27.2258 +                EDOM_INFO(p)->period_orig;
 27.2259 +        }
 27.2260 +    }
 27.2261 +    /*adjust all slices (and periods) to the new weight*/
 27.2262 +    for_each_domain(d) 
 27.2263 +        for_each_exec_domain(d, p) {
 27.2264 +        if (EDOM_INFO(p)->weight) {
 27.2265 +            EDOM_INFO(p)->period_orig = 
 27.2266 +                EDOM_INFO(p)->period  = WEIGHT_PERIOD;
 27.2267 +            EDOM_INFO(p)->slice_orig  =
 27.2268 +                EDOM_INFO(p)->slice   = 
 27.2269 +                (EDOM_INFO(p)->weight *
 27.2270 +                 (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) / 
 27.2271 +                sumw[p->processor];
 27.2272 +        }
 27.2273 +    }
 27.2274 +    return 0;
 27.2275  }
 27.2276  
 27.2277  /* set or fetch domain scheduling parameters */
 27.2278  static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd) {
 27.2279 -	struct exec_domain *ed;
 27.2280 +    struct exec_domain *ed;
 27.2281  
 27.2282 -	PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
 27.2283 -	        "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
 27.2284 -		p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
 27.2285 -		cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
 27.2286 -	if ( cmd->direction == SCHED_INFO_PUT )
 27.2287 -	{
 27.2288 -		/*check for sane parameters*/
 27.2289 -		if (!cmd->u.sedf.period && !cmd->u.sedf.weight)
 27.2290 -			return -EINVAL;
 27.2291 -		if (cmd->u.sedf.weight) {
 27.2292 -			if ((cmd->u.sedf.extratime & EXTRA_AWARE) &&
 27.2293 -			    (! cmd->u.sedf.period)) {
 27.2294 -			/*weight driven domains with xtime ONLY!*/
 27.2295 -				for_each_exec_domain(p, ed) {
 27.2296 -				  EDOM_INFO(ed)->extraweight = cmd->u.sedf.weight;
 27.2297 -				  EDOM_INFO(ed)->weight = 0;
 27.2298 -				  EDOM_INFO(ed)->slice = 0;
 27.2299 -				  EDOM_INFO(ed)->period = WEIGHT_PERIOD;
 27.2300 -				}
 27.2301 -			} else {
 27.2302 -			/*weight driven domains with real-time execution*/
 27.2303 -				for_each_exec_domain(p, ed)
 27.2304 -				  EDOM_INFO(ed)->weight = cmd->u.sedf.weight;
 27.2305 -			}
 27.2306 -		}
 27.2307 -		else {
 27.2308 -			/*time driven domains*/
 27.2309 -			for_each_exec_domain(p, ed) {
 27.2310 -				/* sanity checking! */
 27.2311 -				if(cmd->u.sedf.slice > cmd->u.sedf.period )
 27.2312 -					return -EINVAL;
 27.2313 -				EDOM_INFO(ed)->weight = 0;
 27.2314 -				EDOM_INFO(ed)->extraweight = 0;
 27.2315 -				EDOM_INFO(ed)->period_orig = 
 27.2316 -				EDOM_INFO(ed)->period   = cmd->u.sedf.period;
 27.2317 -				EDOM_INFO(ed)->slice_orig  = 
 27.2318 -				EDOM_INFO(ed)->slice    = cmd->u.sedf.slice;
 27.2319 -			}
 27.2320 -		}
 27.2321 -		if (sedf_adjust_weights(cmd))
 27.2322 -			return -EINVAL;
 27.2323 -			
 27.2324 -		for_each_exec_domain(p, ed) {
 27.2325 -			EDOM_INFO(ed)->status  = (EDOM_INFO(ed)->status &
 27.2326 -			  ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
 27.2327 -			EDOM_INFO(ed)->latency = cmd->u.sedf.latency;
 27.2328 -			extraq_check(ed);
 27.2329 -		}
 27.2330 -	}
 27.2331 -	else if ( cmd->direction == SCHED_INFO_GET )
 27.2332 -	{
 27.2333 -		cmd->u.sedf.period    = EDOM_INFO(p->exec_domain[0])->period;
 27.2334 -		cmd->u.sedf.slice     = EDOM_INFO(p->exec_domain[0])->slice;
 27.2335 -		cmd->u.sedf.extratime = EDOM_INFO(p->exec_domain[0])->status
 27.2336 -		                            & EXTRA_AWARE;
 27.2337 -		cmd->u.sedf.latency   = EDOM_INFO(p->exec_domain[0])->latency;
 27.2338 -		cmd->u.sedf.weight    = EDOM_INFO(p->exec_domain[0])->weight;
 27.2339 -	}
 27.2340 -	PRINT(2,"sedf_adjdom_finished\n");
 27.2341 -	return 0;
 27.2342 +    PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
 27.2343 +          "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
 27.2344 +          p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
 27.2345 +          cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
 27.2346 +    if ( cmd->direction == SCHED_INFO_PUT )
 27.2347 +    {
 27.2348 +        /*check for sane parameters*/
 27.2349 +        if (!cmd->u.sedf.period && !cmd->u.sedf.weight)
 27.2350 +            return -EINVAL;
 27.2351 +        if (cmd->u.sedf.weight) {
 27.2352 +            if ((cmd->u.sedf.extratime & EXTRA_AWARE) &&
 27.2353 +                (! cmd->u.sedf.period)) {
 27.2354 +                /*weight driven domains with xtime ONLY!*/
 27.2355 +                for_each_exec_domain(p, ed) {
 27.2356 +                    EDOM_INFO(ed)->extraweight = cmd->u.sedf.weight;
 27.2357 +                    EDOM_INFO(ed)->weight = 0;
 27.2358 +                    EDOM_INFO(ed)->slice = 0;
 27.2359 +                    EDOM_INFO(ed)->period = WEIGHT_PERIOD;
 27.2360 +                }
 27.2361 +            } else {
 27.2362 +                /*weight driven domains with real-time execution*/
 27.2363 +                for_each_exec_domain(p, ed)
 27.2364 +                    EDOM_INFO(ed)->weight = cmd->u.sedf.weight;
 27.2365 +            }
 27.2366 +        }
 27.2367 +        else {
 27.2368 +            /*time driven domains*/
 27.2369 +            for_each_exec_domain(p, ed) {
 27.2370 +                /* sanity checking! */
 27.2371 +                if(cmd->u.sedf.slice > cmd->u.sedf.period )
 27.2372 +                    return -EINVAL;
 27.2373 +                EDOM_INFO(ed)->weight = 0;
 27.2374 +                EDOM_INFO(ed)->extraweight = 0;
 27.2375 +                EDOM_INFO(ed)->period_orig = 
 27.2376 +                    EDOM_INFO(ed)->period   = cmd->u.sedf.period;
 27.2377 +                EDOM_INFO(ed)->slice_orig  = 
 27.2378 +                    EDOM_INFO(ed)->slice    = cmd->u.sedf.slice;
 27.2379 +            }
 27.2380 +        }
 27.2381 +        if (sedf_adjust_weights(cmd))
 27.2382 +            return -EINVAL;
 27.2383 +   
 27.2384 +        for_each_exec_domain(p, ed) {
 27.2385 +            EDOM_INFO(ed)->status  = 
 27.2386 +                (EDOM_INFO(ed)->status &
 27.2387 +                 ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
 27.2388 +            EDOM_INFO(ed)->latency = cmd->u.sedf.latency;
 27.2389 +            extraq_check(ed);
 27.2390 +        }
 27.2391 +    }
 27.2392 +    else if ( cmd->direction == SCHED_INFO_GET )
 27.2393 +    {
 27.2394 +        cmd->u.sedf.period    = EDOM_INFO(p->exec_domain[0])->period;
 27.2395 +        cmd->u.sedf.slice     = EDOM_INFO(p->exec_domain[0])->slice;
 27.2396 +        cmd->u.sedf.extratime = EDOM_INFO(p->exec_domain[0])->status
 27.2397 +            & EXTRA_AWARE;
 27.2398 +        cmd->u.sedf.latency   = EDOM_INFO(p->exec_domain[0])->latency;
 27.2399 +        cmd->u.sedf.weight    = EDOM_INFO(p->exec_domain[0])->weight;
 27.2400 +    }
 27.2401 +    PRINT(2,"sedf_adjdom_finished\n");
 27.2402 +    return 0;
 27.2403  }
 27.2404  
 27.2405  struct scheduler sched_sedf_def = {
 27.2406 @@ -1464,11 +1439,9 @@ struct scheduler sched_sedf_def = {
 27.2407      .opt_name = "sedf",
 27.2408      .sched_id = SCHED_SEDF,
 27.2409      
 27.2410 -    .init_idle_task = sedf_init_idle_task,
 27.2411      .alloc_task     = sedf_alloc_task,
 27.2412      .add_task       = sedf_add_task,
 27.2413      .free_task      = sedf_free_task,
 27.2414 -    .init_scheduler = sedf_init_scheduler,
 27.2415      .do_schedule    = sedf_do_schedule,
 27.2416      .dump_cpu_state = sedf_dump_cpu_state,
 27.2417      .sleep          = sedf_sleep,
    28.1 --- a/xen/common/schedule.c	Wed May 25 10:32:53 2005 +0000
    28.2 +++ b/xen/common/schedule.c	Wed May 25 10:36:59 2005 +0000
    28.3 @@ -41,11 +41,6 @@
    28.4  static char opt_sched[10] = "bvt";
    28.5  string_param("sched", opt_sched);
    28.6  
    28.7 -/*#define WAKE_HISTO*/
    28.8 -/*#define BLOCKTIME_HISTO*/
    28.9 -/*#define ADV_SCHED_HISTO*/
   28.10 -//#include <xen/adv_sched_hist.h>
   28.11 -
   28.12  #if defined(WAKE_HISTO)
   28.13  #define BUCKETS 31
   28.14  #elif defined(BLOCKTIME_HISTO)
   28.15 @@ -93,8 +88,8 @@ void free_domain_struct(struct domain *d
   28.16      xfree(d);
   28.17  }
   28.18  
   28.19 -struct exec_domain *alloc_exec_domain_struct(struct domain *d,
   28.20 -                                             unsigned long vcpu)
   28.21 +struct exec_domain *alloc_exec_domain_struct(
   28.22 +    struct domain *d, unsigned long vcpu)
   28.23  {
   28.24      struct exec_domain *ed, *edc;
   28.25  
   28.26 @@ -126,10 +121,10 @@ struct exec_domain *alloc_exec_domain_st
   28.27          edc->next_in_list = ed;
   28.28  
   28.29          if (test_bit(_VCPUF_cpu_pinned, &edc->vcpu_flags)) {
   28.30 -            ed->processor = (edc->processor + 1) % smp_num_cpus;
   28.31 +            ed->processor = (edc->processor + 1) % num_online_cpus();
   28.32              set_bit(_VCPUF_cpu_pinned, &ed->vcpu_flags);
   28.33          } else {
   28.34 -            ed->processor = (edc->processor + 1) % smp_num_cpus;  /* XXX */
   28.35 +            ed->processor = (edc->processor + 1) % num_online_cpus();
   28.36          }
   28.37      }
   28.38  
   28.39 @@ -168,20 +163,22 @@ void sched_add_domain(struct exec_domain
   28.40  {
   28.41      struct domain *d = ed->domain;
   28.42  
   28.43 -    /* Must be unpaused by control software to start execution. */
   28.44 -    set_bit(_VCPUF_ctrl_pause, &ed->vcpu_flags);
   28.45 +    /* Initialise the per-domain timer. */
   28.46 +    init_ac_timer(&ed->timer);
   28.47 +    ed->timer.cpu      = ed->processor;
   28.48 +    ed->timer.data     = (unsigned long)ed;
   28.49 +    ed->timer.function = &dom_timer_fn;
   28.50  
   28.51 -    if ( d->domain_id != IDLE_DOMAIN_ID )
   28.52 +    if ( is_idle_task(d) )
   28.53      {
   28.54 -        /* Initialise the per-domain timer. */
   28.55 -        init_ac_timer(&ed->timer);
   28.56 -        ed->timer.cpu      = ed->processor;
   28.57 -        ed->timer.data     = (unsigned long)ed;
   28.58 -        ed->timer.function = &dom_timer_fn;
   28.59 +        schedule_data[ed->processor].curr = ed;
   28.60 +        schedule_data[ed->processor].idle = ed;
   28.61 +        set_bit(_VCPUF_running, &ed->vcpu_flags);
   28.62      }
   28.63      else
   28.64      {
   28.65 -        schedule_data[ed->processor].idle = ed;
   28.66 +        /* Must be unpaused by control software to start execution. */
   28.67 +        set_bit(_VCPUF_ctrl_pause, &ed->vcpu_flags);
   28.68      }
   28.69  
   28.70      SCHED_OP(add_task, ed);
   28.71 @@ -195,12 +192,6 @@ void sched_rem_domain(struct exec_domain
   28.72      TRACE_2D(TRC_SCHED_DOM_REM, ed->domain->domain_id, ed->vcpu_id);
   28.73  }
   28.74  
   28.75 -void init_idle_task(void)
   28.76 -{
   28.77 -    if ( SCHED_OP(init_idle_task, current) < 0 )
   28.78 -        BUG();
   28.79 -}
   28.80 -
   28.81  void domain_sleep(struct exec_domain *ed)
   28.82  {
   28.83      unsigned long flags;
   28.84 @@ -240,10 +231,6 @@ long do_block(void)
   28.85  {
   28.86      struct exec_domain *ed = current;
   28.87  
   28.88 -#ifdef ADV_SCHED_HISTO
   28.89 -    adv_sched_hist_start(current->processor);
   28.90 -#endif
   28.91 -
   28.92      ed->vcpu_info->evtchn_upcall_mask = 0;
   28.93      set_bit(_VCPUF_blocked, &ed->vcpu_flags);
   28.94  
   28.95 @@ -264,10 +251,6 @@ long do_block(void)
   28.96  /* Voluntarily yield the processor for this allocation. */
   28.97  static long do_yield(void)
   28.98  {
   28.99 -#ifdef ADV_SCHED_HISTO
  28.100 -    adv_sched_hist_start(current->processor);
  28.101 -#endif
  28.102 -    
  28.103      TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
  28.104      __enter_scheduler();
  28.105      return 0;
  28.106 @@ -422,13 +405,7 @@ static void __enter_scheduler(void)
  28.107      
  28.108      spin_lock_irq(&schedule_data[cpu].schedule_lock);
  28.109  
  28.110 -#ifdef ADV_SCHED_HISTO
  28.111 -    adv_sched_hist_from_stop(cpu);
  28.112 -#endif
  28.113      now = NOW();
  28.114 -#ifdef ADV_SCHED_HISTO
  28.115 -    adv_sched_hist_start(cpu);
  28.116 -#endif
  28.117  
  28.118      rem_ac_timer(&schedule_data[cpu].s_timer);
  28.119      
  28.120 @@ -447,7 +424,7 @@ static void __enter_scheduler(void)
  28.121      next->lastschd = now;
  28.122  
  28.123      /* reprogramm the timer */
  28.124 -    schedule_data[cpu].s_timer.expires  = now + r_time;
  28.125 +    schedule_data[cpu].s_timer.expires = now + r_time;
  28.126      add_ac_timer(&schedule_data[cpu].s_timer);
  28.127  
  28.128      /* Must be protected by the schedule_lock! */
  28.129 @@ -455,12 +432,9 @@ static void __enter_scheduler(void)
  28.130  
  28.131      spin_unlock_irq(&schedule_data[cpu].schedule_lock);
  28.132  
  28.133 -    if ( unlikely(prev == next) ) {
  28.134 -#ifdef ADV_SCHED_HISTO
  28.135 -        adv_sched_hist_to_stop(cpu);
  28.136 -#endif
  28.137 +    if ( unlikely(prev == next) )
  28.138          return continue_running(prev);
  28.139 -    }
  28.140 +
  28.141      perfc_incrc(sched_ctx);
  28.142  
  28.143  #if defined(WAKE_HISTO)
  28.144 @@ -495,10 +469,6 @@ static void __enter_scheduler(void)
  28.145               prev->domain->domain_id, prev->vcpu_id,
  28.146               next->domain->domain_id, next->vcpu_id);
  28.147  
  28.148 -#ifdef ADV_SCHED_HISTO
  28.149 -    adv_sched_hist_to_stop(cpu);
  28.150 -#endif
  28.151 -
  28.152      context_switch(prev, next);
  28.153  }
  28.154  
  28.155 @@ -520,10 +490,6 @@ int idle_cpu(int cpu)
  28.156  /* The scheduler timer: force a run through the scheduler */
  28.157  static void s_timer_fn(unsigned long unused)
  28.158  {
  28.159 -#ifdef ADV_SCHED_HISTO
  28.160 -    adv_sched_hist_start(current->processor);
  28.161 -#endif
  28.162 -
  28.163      raise_softirq(SCHEDULE_SOFTIRQ);
  28.164      perfc_incrc(sched_irq);
  28.165  }
  28.166 @@ -567,8 +533,7 @@ void __init scheduler_init(void)
  28.167      for ( i = 0; i < NR_CPUS; i++ )
  28.168      {
  28.169          spin_lock_init(&schedule_data[i].schedule_lock);
  28.170 -        schedule_data[i].curr = &idle0_exec_domain;
  28.171 -        
  28.172 +
  28.173          init_ac_timer(&schedule_data[i].s_timer);
  28.174          schedule_data[i].s_timer.cpu      = i;
  28.175          schedule_data[i].s_timer.data     = 2;
  28.176 @@ -580,7 +545,8 @@ void __init scheduler_init(void)
  28.177          t_timer[i].function = &t_timer_fn;
  28.178      }
  28.179  
  28.180 -    schedule_data[0].idle = &idle0_exec_domain;
  28.181 +    schedule_data[0].curr = idle_task[0];
  28.182 +    schedule_data[0].idle = idle_task[0];
  28.183  
  28.184      for ( i = 0; schedulers[i] != NULL; i++ )
  28.185      {
  28.186 @@ -594,8 +560,8 @@ void __init scheduler_init(void)
  28.187  
  28.188      printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
  28.189  
  28.190 -    if ( SCHED_OP(init_scheduler) < 0 )
  28.191 -        panic("Initialising scheduler failed!");
  28.192 +    BUG_ON(SCHED_OP(alloc_task, idle_task[0]) < 0);
  28.193 +    sched_add_domain(idle_task[0]);
  28.194  }
  28.195  
  28.196  /*
  28.197 @@ -604,14 +570,10 @@ void __init scheduler_init(void)
  28.198   */
  28.199  void schedulers_start(void) 
  28.200  {   
  28.201 -    s_timer_fn(0);
  28.202 -    smp_call_function((void *)s_timer_fn, NULL, 1, 1);
  28.203 -
  28.204      t_timer_fn(0);
  28.205      smp_call_function((void *)t_timer_fn, NULL, 1, 1);
  28.206  }
  28.207  
  28.208 -
  28.209  void dump_runq(unsigned char key)
  28.210  {
  28.211      s_time_t      now = NOW();
  28.212 @@ -624,7 +586,7 @@ void dump_runq(unsigned char key)
  28.213      SCHED_OP(dump_settings);
  28.214      printk("NOW=0x%08X%08X\n",  (u32)(now>>32), (u32)now); 
  28.215  
  28.216 -    for ( i = 0; i < smp_num_cpus; i++ )
  28.217 +    for_each_online_cpu ( i )
  28.218      {
  28.219          spin_lock(&schedule_data[i].schedule_lock);
  28.220          printk("CPU[%02d] ", i);
  28.221 @@ -636,10 +598,11 @@ void dump_runq(unsigned char key)
  28.222  }
  28.223  
  28.224  #if defined(WAKE_HISTO) || defined(BLOCKTIME_HISTO)
  28.225 +
  28.226  void print_sched_histo(unsigned char key)
  28.227  {
  28.228      int i, j, k;
  28.229 -    for ( k = 0; k < smp_num_cpus; k++ )
  28.230 +    for_each_online_cpu ( k )
  28.231      {
  28.232          j = 0;
  28.233          printf ("CPU[%02d]: scheduler latency histogram (ms:[count])\n", k);
  28.234 @@ -659,73 +622,20 @@ void print_sched_histo(unsigned char key
  28.235      }
  28.236        
  28.237  }
  28.238 +
  28.239  void reset_sched_histo(unsigned char key)
  28.240  {
  28.241      int i, j;
  28.242 -    for ( j = 0; j < smp_num_cpus; j++ )
  28.243 +    for ( j = 0; j < NR_CPUS; j++ )
  28.244          for ( i=0; i < BUCKETS; i++ ) 
  28.245              schedule_data[j].hist[i] = 0;
  28.246  }
  28.247 +
  28.248  #else
  28.249 -#if defined(ADV_SCHED_HISTO)
  28.250 -void print_sched_histo(unsigned char key)
  28.251 -{
  28.252 -    int i, j, k,t;
  28.253 -    printf("Hello!\n");
  28.254 -    for ( k = 0; k < smp_num_cpus; k++ )
  28.255 -    {
  28.256 -        j = 0;
  28.257 -	t = 0;
  28.258 -        printf ("CPU[%02d]: scheduler latency histogram FROM (ms:[count])\n", k);
  28.259 -        for ( i = 0; i < BUCKETS; i++ )
  28.260 -        {
  28.261 -            //if ( schedule_data[k].hist[i] != 0 )
  28.262 -            {
  28.263 -	        t += schedule_data[k].from_hist[i];
  28.264 -                if ( i < BUCKETS-1 )
  28.265 -                    printk("%3d:[%7u]    ", i, schedule_data[k].from_hist[i]);
  28.266 -                else
  28.267 -                    printk(" >:[%7u]    ", schedule_data[k].from_hist[i]);
  28.268 -                //if ( !(++j % 5) )
  28.269 -                    printk("\n");
  28.270 -            }
  28.271 -        }
  28.272 -        printk("\nTotal: %i\n",t);
  28.273 -    }
  28.274 -    for ( k = 0; k < smp_num_cpus; k++ )
  28.275 -    {
  28.276 -        j = 0; t = 0;
  28.277 -        printf ("CPU[%02d]: scheduler latency histogram TO (ms:[count])\n", k);
  28.278 -        for ( i = 0; i < BUCKETS; i++ )
  28.279 -        {
  28.280 -            //if ( schedule_data[k].hist[i] != 0 )
  28.281 -            {
  28.282 -	    	t += schedule_data[k].from_hist[i];
  28.283 -                if ( i < BUCKETS-1 )
  28.284 -                    printk("%3d:[%7u]    ", i, schedule_data[k].to_hist[i]);
  28.285 -                else
  28.286 -                    printk(" >:[%7u]    ", schedule_data[k].to_hist[i]);
  28.287 -                //if ( !(++j % 5) )
  28.288 -                    printk("\n");
  28.289 -            }
  28.290 -        }
  28.291 -	printk("\nTotal: %i\n",t);
  28.292 -    }
  28.293 -      
  28.294 -}
  28.295 -void reset_sched_histo(unsigned char key)
  28.296 -{
  28.297 -    int i, j;
  28.298 -    for ( j = 0; j < smp_num_cpus; j++ ) {
  28.299 -        for ( i=0; i < BUCKETS; i++ ) 
  28.300 -            schedule_data[j].to_hist[i] = schedule_data[j].from_hist[i] = 0;
  28.301 -        schedule_data[j].save_tsc = 0;
  28.302 -    }
  28.303 -}
  28.304 -#else
  28.305 +
  28.306  void print_sched_histo(unsigned char key) { }
  28.307  void reset_sched_histo(unsigned char key) { }
  28.308 -#endif
  28.309 +
  28.310  #endif
  28.311  
  28.312  /*
    29.1 --- a/xen/common/trace.c	Wed May 25 10:32:53 2005 +0000
    29.2 +++ b/xen/common/trace.c	Wed May 25 10:36:59 2005 +0000
    29.3 @@ -66,7 +66,7 @@ void init_trace_bufs(void)
    29.4          return;
    29.5      }
    29.6  
    29.7 -    nr_pages = smp_num_cpus * opt_tbuf_size;
    29.8 +    nr_pages = num_online_cpus() * opt_tbuf_size;
    29.9      order    = get_order(nr_pages * PAGE_SIZE);
   29.10      
   29.11      if ( (rawbuf = (char *)alloc_xenheap_pages(order)) == NULL )
   29.12 @@ -79,7 +79,7 @@ void init_trace_bufs(void)
   29.13      for ( i = 0; i < nr_pages; i++ )
   29.14          SHARE_PFN_WITH_DOMAIN(virt_to_page(rawbuf + i * PAGE_SIZE), dom0);
   29.15      
   29.16 -    for ( i = 0; i < smp_num_cpus; i++ )
   29.17 +    for_each_online_cpu ( i )
   29.18      {
   29.19          buf = t_bufs[i] = (struct t_buf *)&rawbuf[i*opt_tbuf_size*PAGE_SIZE];
   29.20          
    30.1 --- a/xen/include/asm-x86/asm_defns.h	Wed May 25 10:32:53 2005 +0000
    30.2 +++ b/xen/include/asm-x86/asm_defns.h	Wed May 25 10:36:59 2005 +0000
    30.3 @@ -6,8 +6,10 @@
    30.4  #include <asm/asm-offsets.h>
    30.5  #include <asm/processor.h>
    30.6  
    30.7 +#ifndef STR
    30.8  #define __STR(x) #x
    30.9  #define STR(x) __STR(x)
   30.10 +#endif
   30.11  
   30.12  #ifdef __x86_64__
   30.13  #include <asm/x86_64/asm_defns.h>
    31.1 --- a/xen/include/asm-x86/bitops.h	Wed May 25 10:32:53 2005 +0000
    31.2 +++ b/xen/include/asm-x86/bitops.h	Wed May 25 10:36:59 2005 +0000
    31.3 @@ -7,6 +7,11 @@
    31.4  
    31.5  #include <xen/config.h>
    31.6  
    31.7 +#ifndef STR
    31.8 +#define __STR(x) #x
    31.9 +#define STR(x) __STR(x)
   31.10 +#endif
   31.11 +
   31.12  /*
   31.13   * These have to be done with inline assembly: that way the bit-setting
   31.14   * is guaranteed to be atomic. All bit operations return 0 if the bit
   31.15 @@ -246,29 +251,28 @@ static __inline__ int variable_test_bit(
   31.16  /**
   31.17   * find_first_zero_bit - find the first zero bit in a memory region
   31.18   * @addr: The address to start the search at
   31.19 - * @size: The maximum bitnumber to search
   31.20 + * @size: The maximum size to search
   31.21   *
   31.22   * Returns the bit-number of the first zero bit, not the number of the byte
   31.23 - * containing a bit. -1 when none found.
   31.24 + * containing a bit.
   31.25   */
   31.26 -static __inline__ int find_first_zero_bit(void * addr, unsigned size)
   31.27 +static inline long find_first_zero_bit(
   31.28 +    const unsigned long *addr, unsigned size)
   31.29  {
   31.30 -	int d0, d1, d2;
   31.31 -	int res;
   31.32 +	long d0, d1, d2;
   31.33 +	long res;
   31.34  
   31.35 -	if (!size)
   31.36 -		return 0;
   31.37  	__asm__ __volatile__(
   31.38 -		"movl $-1,%%eax\n\t"
   31.39 -		"xorl %%edx,%%edx\n\t"
   31.40 -		"repe; scasl\n\t"
   31.41 +		"mov $-1,%%"__OP"ax\n\t"
   31.42 +		"xor %%edx,%%edx\n\t"
   31.43 +		"repe; scas"__OS"\n\t"
   31.44  		"je 1f\n\t"
   31.45 -		"xorl -4(%%"__OP"di),%%eax\n\t"
   31.46 -		"sub"__OS" $4,%%"__OP"di\n\t"
   31.47 -		"bsfl %%eax,%%edx\n"
   31.48 -		"1:\tsub"__OS" %%"__OP"bx,%%"__OP"di\n\t"
   31.49 -		"shl"__OS" $3,%%"__OP"di\n\t"
   31.50 -		"add"__OS" %%"__OP"di,%%"__OP"dx"
   31.51 +		"lea -"STR(BITS_PER_LONG/8)"(%%"__OP"di),%%"__OP"di\n\t"
   31.52 +		"xor (%%"__OP"di),%%"__OP"ax\n\t"
   31.53 +		"bsf %%"__OP"ax,%%"__OP"dx\n"
   31.54 +		"1:\tsub %%"__OP"bx,%%"__OP"di\n\t"
   31.55 +		"shl $3,%%"__OP"di\n\t"
   31.56 +		"add %%"__OP"di,%%"__OP"dx"
   31.57  		:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
   31.58  		:"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
   31.59  	return res;
   31.60 @@ -280,65 +284,71 @@ static __inline__ int find_first_zero_bi
   31.61   * @offset: The bitnumber to start searching at
   31.62   * @size: The maximum size to search
   31.63   */
   31.64 -static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
   31.65 +long find_next_zero_bit(const unsigned long *addr, int size, int offset);
   31.66 +
   31.67 +/**
   31.68 + * find_first_bit - find the first set bit in a memory region
   31.69 + * @addr: The address to start the search at
   31.70 + * @size: The maximum size to search
   31.71 + *
   31.72 + * Returns the bit-number of the first set bit, not the number of the byte
   31.73 + * containing a bit.
   31.74 + */
   31.75 +static inline long find_first_bit(
   31.76 +    const unsigned long *addr, unsigned size)
   31.77  {
   31.78 -	unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
   31.79 -	int set = 0, bit = offset & 31, res;
   31.80 -	
   31.81 -	if (bit) {
   31.82 -		/*
   31.83 -		 * Look for zero in first byte
   31.84 -		 */
   31.85 -		__asm__("bsfl %1,%0\n\t"
   31.86 -			"jne 1f\n\t"
   31.87 -			"movl $32, %0\n"
   31.88 -			"1:"
   31.89 -			: "=r" (set)
   31.90 -			: "r" (~(*p >> bit)));
   31.91 -		if (set < (32 - bit))
   31.92 -			return set + offset;
   31.93 -		set = 32 - bit;
   31.94 -		p++;
   31.95 -	}
   31.96 -	/*
   31.97 -	 * No zero yet, search remaining full bytes for a zero
   31.98 -	 */
   31.99 -	res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr));
  31.100 -	return (offset + set + res);
  31.101 +	long d0, d1;
  31.102 +	long res;
  31.103 +
  31.104 +	__asm__ __volatile__(
  31.105 +		"xor %%eax,%%eax\n\t"
  31.106 +		"repe; scas"__OS"\n\t"
  31.107 +		"je 1f\n\t"
  31.108 +		"lea -"STR(BITS_PER_LONG/8)"(%%"__OP"di),%%"__OP"di\n\t"
  31.109 +		"bsf (%%"__OP"di),%%"__OP"ax\n"
  31.110 +		"1:\tsub %%"__OP"bx,%%"__OP"di\n\t"
  31.111 +		"shl $3,%%"__OP"di\n\t"
  31.112 +		"add %%"__OP"di,%%"__OP"ax"
  31.113 +		:"=a" (res), "=&c" (d0), "=&D" (d1)
  31.114 +		:"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
  31.115 +	return res;
  31.116  }
  31.117  
  31.118  /**
  31.119 - * ffz - find first zero in word.
  31.120 - * @word: The word to search
  31.121 - *
  31.122 - * Undefined if no zero exists, so code should check against ~0UL first.
  31.123 + * find_next_bit - find the first set bit in a memory region
  31.124 + * @addr: The address to base the search on
  31.125 + * @offset: The bitnumber to start searching at
  31.126 + * @size: The maximum size to search
  31.127   */
  31.128 -static __inline__ unsigned long ffz(unsigned long word)
  31.129 +long find_next_bit(const unsigned long *addr, int size, int offset);
  31.130 +
  31.131 +/* return index of first bet set in val or max when no bit is set */
  31.132 +static inline unsigned long __scanbit(unsigned long val, unsigned long max)
  31.133  {
  31.134 -	__asm__("bsf"__OS" %1,%0"
  31.135 -		:"=r" (word)
  31.136 -		:"r" (~word));
  31.137 -	return word;
  31.138 +	asm("bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max));
  31.139 +	return val;
  31.140  }
  31.141  
  31.142 -/**
  31.143 - * ffs - find first bit set
  31.144 - * @x: the word to search
  31.145 - *
  31.146 - * This is defined the same way as
  31.147 - * the libc and compiler builtin ffs routines, therefore
  31.148 - * differs in spirit from the above ffz (man ffs).
  31.149 - */
  31.150 -static __inline__ int ffs(int x)
  31.151 -{
  31.152 -	int r;
  31.153 +#define find_first_bit(addr,size) \
  31.154 +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
  31.155 +  (__scanbit(*(unsigned long *)addr,(size))) : \
  31.156 +  find_first_bit(addr,size)))
  31.157  
  31.158 -	__asm__("bsfl %1,%0\n\t"
  31.159 -		"jnz 1f\n\t"
  31.160 -		"movl $-1,%0\n"
  31.161 -		"1:" : "=r" (r) : "g" (x));
  31.162 -	return r+1;
  31.163 -}
  31.164 +#define find_next_bit(addr,size,off) \
  31.165 +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?         \
  31.166 +  ((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \
  31.167 +  find_next_bit(addr,size,off)))
  31.168 +
  31.169 +#define find_first_zero_bit(addr,size) \
  31.170 +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
  31.171 +  (__scanbit(~*(unsigned long *)addr,(size))) : \
  31.172 +  find_first_zero_bit(addr,size)))
  31.173 +        
  31.174 +#define find_next_zero_bit(addr,size,off) \
  31.175 +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?         \
  31.176 +  ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \
  31.177 +  find_next_zero_bit(addr,size,off)))
  31.178 +
  31.179  
  31.180  /*
  31.181   * These are the preferred 'find first' functions in Xen.
    32.1 --- a/xen/include/asm-x86/div64.h	Wed May 25 10:32:53 2005 +0000
    32.2 +++ b/xen/include/asm-x86/div64.h	Wed May 25 10:36:59 2005 +0000
    32.3 @@ -1,17 +1,48 @@
    32.4  #ifndef __I386_DIV64
    32.5  #define __I386_DIV64
    32.6  
    32.7 +/*
    32.8 + * do_div() is NOT a C function. It wants to return
    32.9 + * two values (the quotient and the remainder), but
   32.10 + * since that doesn't work very well in C, what it
   32.11 + * does is:
   32.12 + *
   32.13 + * - modifies the 64-bit dividend _in_place_
   32.14 + * - returns the 32-bit remainder
   32.15 + *
   32.16 + * This ends up being the most efficient "calling
   32.17 + * convention" on x86.
   32.18 + */
   32.19  #define do_div(n,base) ({ \
   32.20 -	unsigned long __upper, __low, __high, __mod; \
   32.21 +	unsigned long __upper, __low, __high, __mod, __base; \
   32.22 +	__base = (base); \
   32.23  	asm("":"=a" (__low), "=d" (__high):"A" (n)); \
   32.24  	__upper = __high; \
   32.25  	if (__high) { \
   32.26 -		__upper = __high % (base); \
   32.27 -		__high = __high / (base); \
   32.28 +		__upper = __high % (__base); \
   32.29 +		__high = __high / (__base); \
   32.30  	} \
   32.31 -	asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (base), "0" (__low), "1" (__upper)); \
   32.32 +	asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
   32.33  	asm("":"=A" (n):"a" (__low),"d" (__high)); \
   32.34  	__mod; \
   32.35  })
   32.36  
   32.37 +/*
   32.38 + * (long)X = ((long long)divs) / (long)div
   32.39 + * (long)rem = ((long long)divs) % (long)div
   32.40 + *
   32.41 + * Warning, this will do an exception if X overflows.
   32.42 + */
   32.43 +#define div_long_long_rem(a,b,c) div_ll_X_l_rem(a,b,c)
   32.44 +
   32.45 +extern inline long
   32.46 +div_ll_X_l_rem(long long divs, long div, long *rem)
   32.47 +{
   32.48 +	long dum2;
   32.49 +      __asm__("divl %2":"=a"(dum2), "=d"(*rem)
   32.50 +      :	"rm"(div), "A"(divs));
   32.51 +
   32.52 +	return dum2;
   32.53 +
   32.54 +}
   32.55  #endif
    33.1 --- a/xen/include/asm-x86/flushtlb.h	Wed May 25 10:32:53 2005 +0000
    33.2 +++ b/xen/include/asm-x86/flushtlb.h	Wed May 25 10:36:59 2005 +0000
    33.3 @@ -93,7 +93,7 @@ extern void write_cr3(unsigned long cr3)
    33.4  #define local_flush_tlb_one(__addr) \
    33.5      __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
    33.6  
    33.7 -#define flush_tlb_all()     flush_tlb_mask((1 << smp_num_cpus) - 1)
    33.8 +#define flush_tlb_all()     flush_tlb_mask((1 << num_online_cpus()) - 1)
    33.9  
   33.10  #ifndef CONFIG_SMP
   33.11  #define flush_tlb_all_pge()          local_flush_tlb_pge()
    34.1 --- a/xen/include/asm-x86/irq.h	Wed May 25 10:32:53 2005 +0000
    34.2 +++ b/xen/include/asm-x86/irq.h	Wed May 25 10:36:59 2005 +0000
    34.3 @@ -21,38 +21,31 @@ extern void (*interrupt[NR_IRQS])(void);
    34.4  
    34.5  #define platform_legacy_irq(irq)	((irq) < 16)
    34.6  
    34.7 -extern void mask_irq(unsigned int irq);
    34.8 -extern void unmask_irq(unsigned int irq);
    34.9 -extern void disable_8259A_irq(unsigned int irq);
   34.10 -extern void enable_8259A_irq(unsigned int irq);
   34.11 -extern int i8259A_irq_pending(unsigned int irq);
   34.12 -extern void make_8259A_irq(unsigned int irq);
   34.13 -extern void init_8259A(int aeoi);
   34.14 -extern void send_IPI_self(int vector);
   34.15 -extern void init_VISWS_APIC_irqs(void);
   34.16 -extern void setup_IO_APIC(void);
   34.17 -extern void disable_IO_APIC(void);
   34.18 -extern void print_IO_APIC(void);
   34.19 -extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
   34.20 -extern void send_IPI(int dest, int vector);
   34.21 +void disable_8259A_irq(unsigned int irq);
   34.22 +void enable_8259A_irq(unsigned int irq);
   34.23 +int i8259A_irq_pending(unsigned int irq);
   34.24 +void make_8259A_irq(unsigned int irq);
   34.25 +void init_8259A(int aeoi);
   34.26 +void send_IPI_self(int vector);
   34.27 +void init_VISWS_APIC_irqs(void);
   34.28 +void setup_IO_APIC(void);
   34.29 +void disable_IO_APIC(void);
   34.30 +void print_IO_APIC(void);
   34.31 +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
   34.32 +void send_IPI(int dest, int vector);
   34.33 +void setup_ioapic_dest(void);
   34.34  
   34.35  extern unsigned long io_apic_irqs;
   34.36  
   34.37  extern atomic_t irq_err_count;
   34.38  extern atomic_t irq_mis_count;
   34.39  
   34.40 -extern char _stext, _etext;
   34.41 -
   34.42  #define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
   34.43  
   34.44 -#include <xen/irq.h>
   34.45 -
   34.46  static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
   34.47  {
   34.48 -#if defined(CONFIG_X86_IO_APIC)
   34.49      if (IO_APIC_IRQ(i))
   34.50          send_IPI_self(IO_APIC_VECTOR(i));
   34.51 -#endif
   34.52  }
   34.53  
   34.54  #endif /* _ASM_HW_IRQ_H */
    35.1 --- a/xen/include/asm-x86/processor.h	Wed May 25 10:32:53 2005 +0000
    35.2 +++ b/xen/include/asm-x86/processor.h	Wed May 25 10:36:59 2005 +0000
    35.3 @@ -179,6 +179,7 @@ extern struct cpuinfo_x86 cpu_data[];
    35.4  #define current_cpu_data boot_cpu_data
    35.5  #endif
    35.6  
    35.7 +extern  int phys_proc_id[NR_CPUS];
    35.8  extern char ignore_irq13;
    35.9  
   35.10  extern void identify_cpu(struct cpuinfo_x86 *);
    36.1 --- a/xen/include/xen/bitmap.h	Wed May 25 10:32:53 2005 +0000
    36.2 +++ b/xen/include/xen/bitmap.h	Wed May 25 10:36:59 2005 +0000
    36.3 @@ -6,6 +6,7 @@
    36.4  #include <xen/config.h>
    36.5  #include <xen/lib.h>
    36.6  #include <xen/types.h>
    36.7 +#include <xen/bitops.h>
    36.8  
    36.9  /*
   36.10   * bitmaps provide bit arrays that consume one or more unsigned
    37.1 --- a/xen/include/xen/cpumask.h	Wed May 25 10:32:53 2005 +0000
    37.2 +++ b/xen/include/xen/cpumask.h	Wed May 25 10:36:59 2005 +0000
    37.3 @@ -1,27 +1,378 @@
    37.4 +#ifndef __XEN_CPUMASK_H
    37.5 +#define __XEN_CPUMASK_H
    37.6 +
    37.7  /*
    37.8 - * XXX This to be replaced with the Linux file in the near future.
    37.9 + * Cpumasks provide a bitmap suitable for representing the
   37.10 + * set of CPU's in a system, one bit position per CPU number.
   37.11 + *
   37.12 + * See detailed comments in the file xen/bitmap.h describing the
   37.13 + * data type on which these cpumasks are based.
   37.14 + *
   37.15 + * For details of cpumask_scnprintf() and cpumask_parse(),
   37.16 + * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
   37.17 + *
   37.18 + * The available cpumask operations are:
   37.19 + *
   37.20 + * void cpu_set(cpu, mask)		turn on bit 'cpu' in mask
   37.21 + * void cpu_clear(cpu, mask)		turn off bit 'cpu' in mask
   37.22 + * void cpus_setall(mask)		set all bits
   37.23 + * void cpus_clear(mask)		clear all bits
   37.24 + * int cpu_isset(cpu, mask)		true iff bit 'cpu' set in mask
   37.25 + * int cpu_test_and_set(cpu, mask)	test and set bit 'cpu' in mask
   37.26 + *
   37.27 + * void cpus_and(dst, src1, src2)	dst = src1 & src2  [intersection]
   37.28 + * void cpus_or(dst, src1, src2)	dst = src1 | src2  [union]
   37.29 + * void cpus_xor(dst, src1, src2)	dst = src1 ^ src2
   37.30 + * void cpus_andnot(dst, src1, src2)	dst = src1 & ~src2
   37.31 + * void cpus_complement(dst, src)	dst = ~src
   37.32 + *
   37.33 + * int cpus_equal(mask1, mask2)		Does mask1 == mask2?
   37.34 + * int cpus_intersects(mask1, mask2)	Do mask1 and mask2 intersect?
   37.35 + * int cpus_subset(mask1, mask2)	Is mask1 a subset of mask2?
   37.36 + * int cpus_empty(mask)			Is mask empty (no bits sets)?
   37.37 + * int cpus_full(mask)			Is mask full (all bits sets)?
   37.38 + * int cpus_weight(mask)		Hamming weigh - number of set bits
   37.39 + *
   37.40 + * void cpus_shift_right(dst, src, n)	Shift right
   37.41 + * void cpus_shift_left(dst, src, n)	Shift left
   37.42 + *
   37.43 + * int first_cpu(mask)			Number lowest set bit, or NR_CPUS
   37.44 + * int next_cpu(cpu, mask)		Next cpu past 'cpu', or NR_CPUS
   37.45 + *
   37.46 + * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
   37.47 + * CPU_MASK_ALL				Initializer - all bits set
   37.48 + * CPU_MASK_NONE			Initializer - no bits set
   37.49 + * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
   37.50 + *
   37.51 + * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
   37.52 + * int cpumask_parse(ubuf, ulen, mask)	Parse ascii string as cpumask
   37.53 + *
   37.54 + * for_each_cpu_mask(cpu, mask)		for-loop cpu over mask
   37.55 + *
   37.56 + * int num_online_cpus()		Number of online CPUs
   37.57 + * int num_possible_cpus()		Number of all possible CPUs
   37.58 + * int num_present_cpus()		Number of present CPUs
   37.59 + *
   37.60 + * int cpu_online(cpu)			Is some cpu online?
   37.61 + * int cpu_possible(cpu)		Is some cpu possible?
   37.62 + * int cpu_present(cpu)			Is some cpu present (can schedule)?
   37.63 + *
   37.64 + * int any_online_cpu(mask)		First online cpu in mask
   37.65 + *
   37.66 + * for_each_cpu(cpu)			for-loop cpu over cpu_possible_map
   37.67 + * for_each_online_cpu(cpu)		for-loop cpu over cpu_online_map
   37.68 + * for_each_present_cpu(cpu)		for-loop cpu over cpu_present_map
   37.69 + *
   37.70 + * Subtlety:
   37.71 + * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
   37.72 + *    to generate slightly worse code.  Note for example the additional
   37.73 + *    40 lines of assembly code compiling the "for each possible cpu"
   37.74 + *    loops buried in the disk_stat_read() macros calls when compiling
   37.75 + *    drivers/block/genhd.c (arch i386, CONFIG_SMP=y).  So use a simple
   37.76 + *    one-line #define for cpu_isset(), instead of wrapping an inline
   37.77 + *    inside a macro, the way we do the other calls.
   37.78   */
   37.79  
   37.80 -#ifndef __XEN_CPUMASK_H__
   37.81 -#define __XEN_CPUMASK_H__
   37.82 -
   37.83 +#include <xen/config.h>
   37.84  #include <xen/bitmap.h>
   37.85 +#include <xen/kernel.h>
   37.86  
   37.87 -typedef u32 cpumask_t;
   37.88 +typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
   37.89 +extern cpumask_t _unused_cpumask_arg_;
   37.90  
   37.91 -#ifndef cpu_online_map
   37.92 -extern cpumask_t cpu_online_map;
   37.93 +#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
   37.94 +static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
   37.95 +{
   37.96 +	set_bit(cpu, dstp->bits);
   37.97 +}
   37.98 +
   37.99 +#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
  37.100 +static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
  37.101 +{
  37.102 +	clear_bit(cpu, dstp->bits);
  37.103 +}
  37.104 +
  37.105 +#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
  37.106 +static inline void __cpus_setall(cpumask_t *dstp, int nbits)
  37.107 +{
  37.108 +	bitmap_fill(dstp->bits, nbits);
  37.109 +}
  37.110 +
  37.111 +#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
  37.112 +static inline void __cpus_clear(cpumask_t *dstp, int nbits)
  37.113 +{
  37.114 +	bitmap_zero(dstp->bits, nbits);
  37.115 +}
  37.116 +
  37.117 +/* No static inline type checking - see Subtlety (1) above. */
  37.118 +#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
  37.119 +
  37.120 +#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
  37.121 +static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
  37.122 +{
  37.123 +	return test_and_set_bit(cpu, addr->bits);
  37.124 +}
  37.125 +
  37.126 +#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
  37.127 +static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
  37.128 +					const cpumask_t *src2p, int nbits)
  37.129 +{
  37.130 +	bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
  37.131 +}
  37.132 +
  37.133 +#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
  37.134 +static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
  37.135 +					const cpumask_t *src2p, int nbits)
  37.136 +{
  37.137 +	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
  37.138 +}
  37.139 +
  37.140 +#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
  37.141 +static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
  37.142 +					const cpumask_t *src2p, int nbits)
  37.143 +{
  37.144 +	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
  37.145 +}
  37.146 +
  37.147 +#define cpus_andnot(dst, src1, src2) \
  37.148 +				__cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
  37.149 +static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
  37.150 +					const cpumask_t *src2p, int nbits)
  37.151 +{
  37.152 +	bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
  37.153 +}
  37.154 +
  37.155 +#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
  37.156 +static inline void __cpus_complement(cpumask_t *dstp,
  37.157 +					const cpumask_t *srcp, int nbits)
  37.158 +{
  37.159 +	bitmap_complement(dstp->bits, srcp->bits, nbits);
  37.160 +}
  37.161 +
  37.162 +#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
  37.163 +static inline int __cpus_equal(const cpumask_t *src1p,
  37.164 +					const cpumask_t *src2p, int nbits)
  37.165 +{
  37.166 +	return bitmap_equal(src1p->bits, src2p->bits, nbits);
  37.167 +}
  37.168 +
  37.169 +#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
  37.170 +static inline int __cpus_intersects(const cpumask_t *src1p,
  37.171 +					const cpumask_t *src2p, int nbits)
  37.172 +{
  37.173 +	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
  37.174 +}
  37.175 +
  37.176 +#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
  37.177 +static inline int __cpus_subset(const cpumask_t *src1p,
  37.178 +					const cpumask_t *src2p, int nbits)
  37.179 +{
  37.180 +	return bitmap_subset(src1p->bits, src2p->bits, nbits);
  37.181 +}
  37.182 +
  37.183 +#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
  37.184 +static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
  37.185 +{
  37.186 +	return bitmap_empty(srcp->bits, nbits);
  37.187 +}
  37.188 +
  37.189 +#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
  37.190 +static inline int __cpus_full(const cpumask_t *srcp, int nbits)
  37.191 +{
  37.192 +	return bitmap_full(srcp->bits, nbits);
  37.193 +}
  37.194 +
  37.195 +#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
  37.196 +static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
  37.197 +{
  37.198 +	return bitmap_weight(srcp->bits, nbits);
  37.199 +}
  37.200 +
  37.201 +#define cpus_shift_right(dst, src, n) \
  37.202 +			__cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
  37.203 +static inline void __cpus_shift_right(cpumask_t *dstp,
  37.204 +					const cpumask_t *srcp, int n, int nbits)
  37.205 +{
  37.206 +	bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
  37.207 +}
  37.208 +
  37.209 +#define cpus_shift_left(dst, src, n) \
  37.210 +			__cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
  37.211 +static inline void __cpus_shift_left(cpumask_t *dstp,
  37.212 +					const cpumask_t *srcp, int n, int nbits)
  37.213 +{
  37.214 +	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
  37.215 +}
  37.216 +
  37.217 +#define first_cpu(src) __first_cpu(&(src), NR_CPUS)
  37.218 +static inline int __first_cpu(const cpumask_t *srcp, int nbits)
  37.219 +{
  37.220 +	return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
  37.221 +}
  37.222 +
  37.223 +#define next_cpu(n, src) __next_cpu((n), &(src), NR_CPUS)
  37.224 +static inline int __next_cpu(int n, const cpumask_t *srcp, int nbits)
  37.225 +{
  37.226 +	return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
  37.227 +}
  37.228 +
  37.229 +#define cpumask_of_cpu(cpu)						\
  37.230 +({									\
  37.231 +	typeof(_unused_cpumask_arg_) m;					\
  37.232 +	if (sizeof(m) == sizeof(unsigned long)) {			\
  37.233 +		m.bits[0] = 1UL<<(cpu);					\
  37.234 +	} else {							\
  37.235 +		cpus_clear(m);						\
  37.236 +		cpu_set((cpu), m);					\
  37.237 +	}								\
  37.238 +	m;								\
  37.239 +})
  37.240 +
  37.241 +#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
  37.242 +
  37.243 +#if NR_CPUS <= BITS_PER_LONG
  37.244 +
  37.245 +#define CPU_MASK_ALL							\
  37.246 +(cpumask_t) { {								\
  37.247 +	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
  37.248 +} }
  37.249 +
  37.250 +#else
  37.251 +
  37.252 +#define CPU_MASK_ALL							\
  37.253 +(cpumask_t) { {								\
  37.254 +	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,			\
  37.255 +	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
  37.256 +} }
  37.257 +
  37.258  #endif
  37.259  
  37.260 -static inline int cpus_weight(cpumask_t w)
  37.261 +#define CPU_MASK_NONE							\
  37.262 +(cpumask_t) { {								\
  37.263 +	[0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL				\
  37.264 +} }
  37.265 +
  37.266 +#define CPU_MASK_CPU0							\
  37.267 +(cpumask_t) { {								\
  37.268 +	[0] =  1UL							\
  37.269 +} }
  37.270 +
  37.271 +#define cpus_addr(src) ((src).bits)
  37.272 +
  37.273 +/*
  37.274 +#define cpumask_scnprintf(buf, len, src) \
  37.275 +			__cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
  37.276 +static inline int __cpumask_scnprintf(char *buf, int len,
  37.277 +					const cpumask_t *srcp, int nbits)
  37.278  {
  37.279 -    unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
  37.280 -    res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
  37.281 -    res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
  37.282 -    res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
  37.283 -    return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
  37.284 +	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
  37.285  }
  37.286  
  37.287 -#define cpus_addr(_m) (&(_m))
  37.288 +#define cpumask_parse(ubuf, ulen, src) \
  37.289 +			__cpumask_parse((ubuf), (ulen), &(src), NR_CPUS)
  37.290 +static inline int __cpumask_parse(const char __user *buf, int len,
  37.291 +					cpumask_t *dstp, int nbits)
  37.292 +{
  37.293 +	return bitmap_parse(buf, len, dstp->bits, nbits);
  37.294 +}
  37.295 +*/
  37.296  
  37.297 -#endif /* __XEN_CPUMASK_H__ */
  37.298 +#if NR_CPUS > 1
  37.299 +#define for_each_cpu_mask(cpu, mask)		\
  37.300 +	for ((cpu) = first_cpu(mask);		\
  37.301 +		(cpu) < NR_CPUS;		\
  37.302 +		(cpu) = next_cpu((cpu), (mask)))
  37.303 +#else /* NR_CPUS == 1 */
  37.304 +#define for_each_cpu_mask(cpu, mask) for ((cpu) = 0; (cpu) < 1; (cpu)++)
  37.305 +#endif /* NR_CPUS */
  37.306 +
  37.307 +/*
  37.308 + * The following particular system cpumasks and operations manage
  37.309 + * possible, present and online cpus.  Each of them is a fixed size
  37.310 + * bitmap of size NR_CPUS.
  37.311 + *
  37.312 + *  #ifdef CONFIG_HOTPLUG_CPU
  37.313 + *     cpu_possible_map - all NR_CPUS bits set
  37.314 + *     cpu_present_map  - has bit 'cpu' set iff cpu is populated
  37.315 + *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
  37.316 + *  #else
  37.317 + *     cpu_possible_map - has bit 'cpu' set iff cpu is populated
  37.318 + *     cpu_present_map  - copy of cpu_possible_map
  37.319 + *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
  37.320 + *  #endif
  37.321 + *
  37.322 + *  In either case, NR_CPUS is fixed at compile time, as the static
  37.323 + *  size of these bitmaps.  The cpu_possible_map is fixed at boot
  37.324 + *  time, as the set of CPU id's that it is possible might ever
  37.325 + *  be plugged in at anytime during the life of that system boot.
  37.326 + *  The cpu_present_map is dynamic(*), representing which CPUs
  37.327 + *  are currently plugged in.  And cpu_online_map is the dynamic
  37.328 + *  subset of cpu_present_map, indicating those CPUs available
  37.329 + *  for scheduling.
  37.330 + *
  37.331 + *  If HOTPLUG is enabled, then cpu_possible_map is forced to have
  37.332 + *  all NR_CPUS bits set, otherwise it is just the set of CPUs that
  37.333 + *  ACPI reports present at boot.
  37.334 + *
  37.335 + *  If HOTPLUG is enabled, then cpu_present_map varies dynamically,
  37.336 + *  depending on what ACPI reports as currently plugged in, otherwise
  37.337 + *  cpu_present_map is just a copy of cpu_possible_map.
  37.338 + *
  37.339 + *  (*) Well, cpu_present_map is dynamic in the hotplug case.  If not
  37.340 + *      hotplug, it's a copy of cpu_possible_map, hence fixed at boot.
  37.341 + *
  37.342 + * Subtleties:
  37.343 + * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
  37.344 + *    assumption that their single CPU is online.  The UP
  37.345 + *    cpu_{online,possible,present}_maps are placebos.  Changing them
  37.346 + *    will have no useful affect on the following num_*_cpus()
  37.347 + *    and cpu_*() macros in the UP case.  This ugliness is a UP
  37.348 + *    optimization - don't waste any instructions or memory references
  37.349 + *    asking if you're online or how many CPUs there are if there is
  37.350 + *    only one CPU.
  37.351 + * 2) Most SMP arch's #define some of these maps to be some
  37.352 + *    other map specific to that arch.  Therefore, the following
  37.353 + *    must be #define macros, not inlines.  To see why, examine
  37.354 + *    the assembly code produced by the following.  Note that
  37.355 + *    set1() writes phys_x_map, but set2() writes x_map:
  37.356 + *        int x_map, phys_x_map;
  37.357 + *        #define set1(a) x_map = a
  37.358 + *        inline void set2(int a) { x_map = a; }
  37.359 + *        #define x_map phys_x_map
  37.360 + *        main(){ set1(3); set2(5); }
  37.361 + */
  37.362 +
  37.363 +extern cpumask_t cpu_possible_map;
  37.364 +extern cpumask_t cpu_online_map;
  37.365 +extern cpumask_t cpu_present_map;
  37.366 +
  37.367 +#if NR_CPUS > 1
  37.368 +#define num_online_cpus()	cpus_weight(cpu_online_map)
  37.369 +#define num_possible_cpus()	cpus_weight(cpu_possible_map)
  37.370 +#define num_present_cpus()	cpus_weight(cpu_present_map)
  37.371 +#define cpu_online(cpu)		cpu_isset((cpu), cpu_online_map)
  37.372 +#define cpu_possible(cpu)	cpu_isset((cpu), cpu_possible_map)
  37.373 +#define cpu_present(cpu)	cpu_isset((cpu), cpu_present_map)
  37.374 +#else
  37.375 +#define num_online_cpus()	1
  37.376 +#define num_possible_cpus()	1
  37.377 +#define num_present_cpus()	1
  37.378 +#define cpu_online(cpu)		((cpu) == 0)
  37.379 +#define cpu_possible(cpu)	((cpu) == 0)
  37.380 +#define cpu_present(cpu)	((cpu) == 0)
  37.381 +#endif
  37.382 +
  37.383 +#define any_online_cpu(mask)			\
  37.384 +({						\
  37.385 +	int cpu;				\
  37.386 +	for_each_cpu_mask(cpu, (mask))		\
  37.387 +		if (cpu_online(cpu))		\
  37.388 +			break;			\
  37.389 +	cpu;					\
  37.390 +})
  37.391 +
  37.392 +#define for_each_cpu(cpu)	  for_each_cpu_mask((cpu), cpu_possible_map)
  37.393 +#define for_each_online_cpu(cpu)  for_each_cpu_mask((cpu), cpu_online_map)
  37.394 +#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
  37.395 +
  37.396 +#endif /* __XEN_CPUMASK_H */
    38.1 --- a/xen/include/xen/kernel.h	Wed May 25 10:32:53 2005 +0000
    38.2 +++ b/xen/include/xen/kernel.h	Wed May 25 10:36:59 2005 +0000
    38.3 @@ -33,5 +33,29 @@
    38.4  #define max_t(type,x,y) \
    38.5          ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
    38.6  
    38.7 +/**
    38.8 + * container_of - cast a member of a structure out to the containing structure
    38.9 + *
   38.10 + * @ptr:	the pointer to the member.
   38.11 + * @type:	the type of the container struct this is embedded in.
   38.12 + * @member:	the name of the member within the struct.
   38.13 + *
   38.14 + */
   38.15 +#define container_of(ptr, type, member) ({			\
   38.16 +        const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
   38.17 +        (type *)( (char *)__mptr - offsetof(type,member) );})
   38.18 +
   38.19 +/*
   38.20 + * Check at compile time that something is of a particular type.
   38.21 + * Always evaluates to 1 so you may use it easily in comparisons.
   38.22 + */
   38.23 +#define typecheck(type,x) \
   38.24 +({	type __dummy; \
   38.25 +	typeof(x) __dummy2; \
   38.26 +	(void)(&__dummy == &__dummy2); \
   38.27 +	1; \
   38.28 +})
   38.29 +
   38.30 +
   38.31  #endif /* _LINUX_KERNEL_H */
   38.32  
    39.1 --- a/xen/include/xen/sched-if.h	Wed May 25 10:32:53 2005 +0000
    39.2 +++ b/xen/include/xen/sched-if.h	Wed May 25 10:36:59 2005 +0000
    39.3 @@ -8,7 +8,6 @@
    39.4  #ifndef __XEN_SCHED_IF_H__
    39.5  #define __XEN_SCHED_IF_H__
    39.6  
    39.7 -//#define ADV_SCHED_HISTO
    39.8  #define BUCKETS  10
    39.9  /*300*/
   39.10  
   39.11 @@ -19,11 +18,6 @@ struct schedule_data {
   39.12      void               *sched_priv;
   39.13      struct ac_timer     s_timer;        /* scheduling timer                */
   39.14      unsigned long       tick;           /* current periodic 'tick'         */
   39.15 -#ifdef ADV_SCHED_HISTO
   39.16 -    u32			to_hist[BUCKETS];
   39.17 -    u32			from_hist[BUCKETS];
   39.18 -    u64			save_tsc;
   39.19 -#endif
   39.20  #ifdef BUCKETS
   39.21      u32                 hist[BUCKETS];  /* for scheduler latency histogram */
   39.22  #endif
   39.23 @@ -39,8 +33,6 @@ struct scheduler {
   39.24      char *opt_name;         /* option name for this scheduler    */
   39.25      unsigned int sched_id;  /* ID for this scheduler             */
   39.26  
   39.27 -    int          (*init_scheduler) (void);
   39.28 -    int          (*init_idle_task) (struct exec_domain *);
   39.29      int          (*alloc_task)     (struct exec_domain *);
   39.30      void         (*add_task)       (struct exec_domain *);
   39.31      void         (*free_task)      (struct domain *);
    40.1 --- a/xen/include/xen/sched.h	Wed May 25 10:32:53 2005 +0000
    40.2 +++ b/xen/include/xen/sched.h	Wed May 25 10:36:59 2005 +0000
    40.3 @@ -246,9 +246,6 @@ void new_thread(struct exec_domain *d,
    40.4                  unsigned long start_stack,
    40.5                  unsigned long start_info);
    40.6  
    40.7 -extern unsigned long wait_init_idle;
    40.8 -#define init_idle() clear_bit(smp_processor_id(), &wait_init_idle);
    40.9 -
   40.10  #define set_current_state(_s) do { current->state = (_s); } while (0)
   40.11  void scheduler_init(void);
   40.12  void schedulers_start(void);
   40.13 @@ -257,7 +254,6 @@ void sched_rem_domain(struct exec_domain
   40.14  long sched_ctl(struct sched_ctl_cmd *);
   40.15  long sched_adjdom(struct sched_adjdom_cmd *);
   40.16  int  sched_id();
   40.17 -void init_idle_task(void);
   40.18  void domain_wake(struct exec_domain *d);
   40.19  void domain_sleep(struct exec_domain *d);
   40.20  
    41.1 --- a/xen/include/xen/smp.h	Wed May 25 10:32:53 2005 +0000
    41.2 +++ b/xen/include/xen/smp.h	Wed May 25 10:36:59 2005 +0000
    41.3 @@ -26,19 +26,19 @@ extern void smp_send_event_check_mask(un
    41.4  #define smp_send_event_check_cpu(_cpu) smp_send_event_check_mask(1<<(_cpu))
    41.5  
    41.6  /*
    41.7 - * Boot processor call to load the other CPU's
    41.8 + * Prepare machine for booting other CPUs.
    41.9   */
   41.10 -extern void smp_boot_cpus(void);
   41.11 +extern void smp_prepare_cpus(unsigned int max_cpus);
   41.12  
   41.13  /*
   41.14 - * Processor call in. Must hold processors until ..
   41.15 + * Bring a CPU up
   41.16   */
   41.17 -extern void smp_callin(void);
   41.18 +extern int __cpu_up(unsigned int cpunum);
   41.19  
   41.20  /*
   41.21 - * Multiprocessors may now schedule
   41.22 + * Final polishing of CPUs
   41.23   */
   41.24 -extern void smp_commence(void);
   41.25 +extern void smp_cpus_done(unsigned int max_cpus);
   41.26  
   41.27  /*
   41.28   * Call a function on all other processors
   41.29 @@ -57,12 +57,6 @@ static inline int on_each_cpu(void (*fun
   41.30      return ret;
   41.31  }
   41.32  
   41.33 -/*
   41.34 - * True once the per process idle is forked
   41.35 - */
   41.36 -extern int smp_threads_ready;
   41.37 -
   41.38 -extern int smp_num_cpus;
   41.39  extern int ht_per_core;
   41.40  extern int opt_noht;
   41.41  
   41.42 @@ -80,6 +74,12 @@ extern volatile int smp_msg_id;
   41.43  #define MSG_RESCHEDULE		0x0003	/* Reschedule request from master CPU*/
   41.44  #define MSG_CALL_FUNCTION       0x0004  /* Call function on all other CPUs */
   41.45  
   41.46 +/*
   41.47 + * Mark the boot cpu "online" so that it can call console drivers in
   41.48 + * printk() and can access its per-cpu storage.
   41.49 + */
   41.50 +void smp_prepare_boot_cpu(void);
   41.51 +
   41.52  #else
   41.53  
   41.54  /*
   41.55 @@ -88,16 +88,14 @@ extern volatile int smp_msg_id;
   41.56  
   41.57  #define smp_send_event_check_mask(_m)           ((void)0)
   41.58  #define smp_send_event_check_cpu(_p)            ((void)0) 
   41.59 -#define smp_num_cpus				1
   41.60 +#ifndef __smp_processor_id
   41.61  #define smp_processor_id()			0
   41.62 +#endif
   41.63  #define hard_smp_processor_id()			0
   41.64 -#define smp_threads_ready			1
   41.65 -#define kernel_lock()
   41.66 -#define cpu_logical_map(cpu)			0
   41.67 -#define cpu_number_map(cpu)			0
   41.68  #define smp_call_function(func,info,retry,wait)	0
   41.69  #define on_each_cpu(func,info,retry,wait)	({ func(info); 0; })
   41.70 -#define cpu_online_map				1
   41.71 +#define num_booting_cpus()			1
   41.72 +#define smp_prepare_boot_cpu()			do {} while (0)
   41.73  
   41.74  #endif
   41.75