ia64/xen-unstable

changeset 18952:07f26e047fbf

merge with xen-unstable.hg
author Isaku Yamahata <yamahata@valinux.co.jp>
date Wed Dec 24 12:52:34 2008 +0900 (2008-12-24)
parents 9837303a4708 e2f36d066b7b
children ecdc570407ec
files xen/arch/x86/cpu/mcheck/p4.c xen/arch/x86/cpu/mcheck/p6.c xen/arch/x86/rwlock.c xen/include/asm-x86/rwlock.h
line diff
     1.1 --- a/extras/mini-os/Makefile	Wed Dec 24 12:50:57 2008 +0900
     1.2 +++ b/extras/mini-os/Makefile	Wed Dec 24 12:52:34 2008 +0900
     1.3 @@ -93,8 +93,12 @@ endif
     1.4  $(OBJ_DIR)/$(TARGET)_app.o: $(APP_OBJS) app.lds
     1.5  	$(LD) -r -d $(LDFLAGS) -\( $^ -\) $(APP_LDLIBS) --undefined main -o $@
     1.6  
     1.7 -$(OBJ_DIR)/$(TARGET): links $(OBJS) $(OBJ_DIR)/$(TARGET)_app.o arch_lib
     1.8 -	$(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJ_DIR)/$(TARGET)_app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
     1.9 +ifneq ($(APP_OBJS),)
    1.10 +APP_O=$(OBJ_DIR)/$(TARGET)_app.o 
    1.11 +endif
    1.12 +
    1.13 +$(OBJ_DIR)/$(TARGET): links $(OBJS) $(APP_O) arch_lib
    1.14 +	$(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
    1.15  	$(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
    1.16  	$(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
    1.17  	gzip -f -9 -c $@ >$@.gz
     2.1 --- a/extras/mini-os/arch/x86/mm.c	Wed Dec 24 12:50:57 2008 +0900
     2.2 +++ b/extras/mini-os/arch/x86/mm.c	Wed Dec 24 12:52:34 2008 +0900
     2.3 @@ -420,7 +420,9 @@ static unsigned long demand_map_area_sta
     2.4  #define DEMAND_MAP_PAGES ((2ULL << 30) / PAGE_SIZE)
     2.5  #endif
     2.6  
     2.7 -#ifdef HAVE_LIBC
     2.8 +#ifndef HAVE_LIBC
     2.9 +#define HEAP_PAGES 0
    2.10 +#else
    2.11  unsigned long heap, brk, heap_mapped, heap_end;
    2.12  #ifdef __x86_64__
    2.13  #define HEAP_PAGES ((128ULL << 30) / PAGE_SIZE)
    2.14 @@ -591,7 +593,7 @@ void arch_init_p2m(unsigned long max_pfn
    2.15  void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p)
    2.16  {
    2.17  
    2.18 -    unsigned long start_pfn, max_pfn;
    2.19 +    unsigned long start_pfn, max_pfn, virt_pfns;
    2.20  
    2.21      printk("  _text:        %p\n", &_text);
    2.22      printk("  _etext:       %p\n", &_etext);
    2.23 @@ -604,7 +606,12 @@ void arch_init_mm(unsigned long* start_p
    2.24      start_pfn = PFN_UP(to_phys(start_info.pt_base)) + 
    2.25                  start_info.nr_pt_frames + 3;
    2.26      max_pfn = start_info.nr_pages;
    2.27 -   
    2.28 +
    2.29 +    /* We need room for demand mapping and heap, clip available memory */
    2.30 +    virt_pfns = DEMAND_MAP_PAGES + HEAP_PAGES;
    2.31 +    if (max_pfn + virt_pfns + 1 < max_pfn)
    2.32 +        max_pfn = -(virt_pfns + 1);
    2.33 +
    2.34      printk("  start_pfn:    %lx\n", start_pfn);
    2.35      printk("  max_pfn:      %lx\n", max_pfn);
    2.36  
     3.1 --- a/extras/mini-os/fs-front.c	Wed Dec 24 12:50:57 2008 +0900
     3.2 +++ b/extras/mini-os/fs-front.c	Wed Dec 24 12:52:34 2008 +0900
     3.3 @@ -869,18 +869,6 @@ moretodo:
     3.4      in_irq = 0;
     3.5  }
     3.6  
     3.7 -/* Small utility function to figure out our domain id */
     3.8 -static domid_t get_self_id(void)
     3.9 -{
    3.10 -    char *dom_id;
    3.11 -    domid_t ret; 
    3.12 -
    3.13 -    BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
    3.14 -    sscanf(dom_id, "%d", &ret);
    3.15 -
    3.16 -    return ret;
    3.17 -}
    3.18 -
    3.19  static void alloc_request_table(struct fs_import *import)
    3.20  {
    3.21      struct fs_request *requests;
    3.22 @@ -1066,7 +1054,7 @@ static int init_fs_import(struct fs_impo
    3.23      unmask_evtchn(import->local_port);
    3.24  
    3.25      
    3.26 -    self_id = get_self_id(); 
    3.27 +    self_id = xenbus_get_self_id(); 
    3.28      /* Write the frontend info to a node in our Xenbus */
    3.29      sprintf(nodename, "/local/domain/%d/device/vfs/%d", 
    3.30                          self_id, import->import_id);
     4.1 --- a/extras/mini-os/include/xenbus.h	Wed Dec 24 12:50:57 2008 +0900
     4.2 +++ b/extras/mini-os/include/xenbus.h	Wed Dec 24 12:52:34 2008 +0900
     4.3 @@ -91,6 +91,9 @@ char* xenbus_printf(xenbus_transaction_t
     4.4                                    const char* fmt, ...)
     4.5                     __attribute__((__format__(printf, 4, 5)));
     4.6  
     4.7 +/* Utility function to figure out our domain id */
     4.8 +domid_t xenbus_get_self_id(void);
     4.9 +
    4.10  /* Reset the XenBus system. */
    4.11  void fini_xenbus(void);
    4.12  
     5.1 --- a/extras/mini-os/kernel.c	Wed Dec 24 12:50:57 2008 +0900
     5.2 +++ b/extras/mini-os/kernel.c	Wed Dec 24 12:52:34 2008 +0900
     5.3 @@ -434,25 +434,25 @@ static void kbdfront_thread(void *p)
     5.4  
     5.5  static struct pcifront_dev *pci_dev;
     5.6  
     5.7 +static void print_pcidev(unsigned int domain, unsigned int bus, unsigned int slot, unsigned int fun)
     5.8 +{
     5.9 +    unsigned int vendor, device, rev, class;
    5.10 +
    5.11 +    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor);
    5.12 +    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device);
    5.13 +    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev);
    5.14 +    pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class);
    5.15 +
    5.16 +    printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, bus, slot, fun, class, vendor, device, rev);
    5.17 +}
    5.18 +
    5.19  static void pcifront_thread(void *p)
    5.20  {
    5.21 -    void print(unsigned int domain, unsigned int bus, unsigned int slot, unsigned int fun)
    5.22 -    {
    5.23 -        unsigned int vendor, device, rev, class;
    5.24 -
    5.25 -        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor);
    5.26 -        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device);
    5.27 -        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev);
    5.28 -        pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class);
    5.29 -
    5.30 -        printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, bus, slot, fun, class, vendor, device, rev);
    5.31 -    }
    5.32 -
    5.33      pci_dev = init_pcifront(NULL);
    5.34      if (!pci_dev)
    5.35          return;
    5.36      printk("PCI devices:\n");
    5.37 -    pcifront_scan(pci_dev, print);
    5.38 +    pcifront_scan(pci_dev, print_pcidev);
    5.39  }
    5.40  
    5.41  static void fs_thread(void *p)
     6.1 --- a/extras/mini-os/xenbus/xenbus.c	Wed Dec 24 12:50:57 2008 +0900
     6.2 +++ b/extras/mini-os/xenbus/xenbus.c	Wed Dec 24 12:52:34 2008 +0900
     6.3 @@ -666,6 +666,17 @@ char* xenbus_printf(xenbus_transaction_t
     6.4      return xenbus_write(xbt,fullpath,val);
     6.5  }
     6.6  
     6.7 +domid_t xenbus_get_self_id(void)
     6.8 +{
     6.9 +    char *dom_id;
    6.10 +    domid_t ret;
    6.11 +
    6.12 +    BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
    6.13 +    sscanf(dom_id, "%d", &ret);
    6.14 +
    6.15 +    return ret;
    6.16 +}
    6.17 +
    6.18  static void do_ls_test(const char *pre)
    6.19  {
    6.20      char **dirs, *msg;
     7.1 --- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in	Wed Dec 24 12:50:57 2008 +0900
     7.2 +++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in	Wed Dec 24 12:52:34 2008 +0900
     7.3 @@ -83,7 +83,7 @@ READLINE_DEP = $$(READLINE_DIR)
     7.4  # -I. for config files.
     7.5  # -I${srcdir} for our headers.
     7.6  # -I$(srcdir)/../regformats for regdef.h.
     7.7 -INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR)  -I../../../../../libxc/
     7.8 +INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR)  -I../../../../../libxc/ -I../../../../../include/
     7.9  
    7.10  # M{H,T}_CFLAGS, if defined, has host- and target-dependent CFLAGS
    7.11  # from the config/ directory.
     8.1 --- a/tools/firmware/hvmloader/hvmloader.c	Wed Dec 24 12:50:57 2008 +0900
     8.2 +++ b/tools/firmware/hvmloader/hvmloader.c	Wed Dec 24 12:52:34 2008 +0900
     8.3 @@ -269,6 +269,11 @@ static void pci_setup(void)
     8.4              printf("pci dev %02x:%x INT%c->IRQ%u\n",
     8.5                     devfn>>3, devfn&7, 'A'+pin-1, isa_irq);
     8.6          }
     8.7 +
     8.8 +        /* Enable bus mastering. */
     8.9 +        cmd = pci_readw(devfn, PCI_COMMAND);
    8.10 +        cmd |= PCI_COMMAND_MASTER;
    8.11 +        pci_writew(devfn, PCI_COMMAND, cmd);
    8.12      }
    8.13  
    8.14      /* Assign iomem and ioport resources in descending order of size. */
    8.15 @@ -536,6 +541,23 @@ static uint16_t init_xen_platform_io_bas
    8.16      return bios_info->xen_pfiob;
    8.17  }
    8.18  
    8.19 +/* Set up an empty TSS area for virtual 8086 mode to use. 
    8.20 + * The only important thing is that it musn't have any bits set 
    8.21 + * in the interrupt redirection bitmap, so all zeros will do.  */
    8.22 +static void init_vm86_tss(void)
    8.23 +{
    8.24 +    uint32_t tss;
    8.25 +    struct xen_hvm_param p;
    8.26 +
    8.27 +    tss = e820_malloc(128, 128);
    8.28 +    memset((char *)tss, 0, 128);
    8.29 +    p.domid = DOMID_SELF;
    8.30 +    p.index = HVM_PARAM_VM86_TSS;
    8.31 +    p.value = tss;
    8.32 +    hypercall_hvm_op(HVMOP_set_param, &p);
    8.33 +    printf("vm86 TSS at %08x\n", tss);
    8.34 +}
    8.35 +
    8.36  int main(void)
    8.37  {
    8.38      int option_rom_sz = 0, vgabios_sz = 0, etherboot_sz = 0;
    8.39 @@ -606,6 +628,8 @@ int main(void)
    8.40          acpi_build_tables();
    8.41      }
    8.42  
    8.43 +    init_vm86_tss();
    8.44 +
    8.45      cmos_write_memory_size();
    8.46  
    8.47      printf("BIOS map:\n");
     9.1 --- a/tools/firmware/rombios/rombios.c	Wed Dec 24 12:50:57 2008 +0900
     9.2 +++ b/tools/firmware/rombios/rombios.c	Wed Dec 24 12:52:34 2008 +0900
     9.3 @@ -1,5 +1,5 @@
     9.4  /////////////////////////////////////////////////////////////////////////
     9.5 -// $Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp $
     9.6 +// $Id: rombios.c,v 1.221 2008/12/07 17:32:29 sshwarts Exp $
     9.7  /////////////////////////////////////////////////////////////////////////
     9.8  //
     9.9  //  Copyright (C) 2002  MandrakeSoft S.A.
    9.10 @@ -22,9 +22,9 @@
    9.11  //
    9.12  //  You should have received a copy of the GNU Lesser General Public
    9.13  //  License along with this library; if not, write to the Free Software
    9.14 -//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
    9.15 -
    9.16 -// ROM BIOS for use with Bochs/Plex x86 emulation environment
    9.17 +//  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
    9.18 +
    9.19 +// ROM BIOS for use with Bochs/Plex86/QEMU emulation environment
    9.20  
    9.21  #define uint8_t unsigned char
    9.22  #define uint16_t unsigned short
    9.23 @@ -81,10 +81,10 @@
    9.24  //
    9.25  // NOTES for El-Torito Boot (cbbochs@free.fr)
    9.26  //   - CD-ROM booting is only available if ATA/ATAPI Driver is available
    9.27 -//   - Current code is only able to boot mono-session cds 
    9.28 +//   - Current code is only able to boot mono-session cds
    9.29  //   - Current code can not boot and emulate a hard-disk
    9.30  //     the bios will panic otherwise
    9.31 -//   - Current code also use memory in EBDA segement. 
    9.32 +//   - Current code also use memory in EBDA segement.
    9.33  //   - I used cmos byte 0x3D to store extended information on boot-device
    9.34  //   - Code has to be modified modified to handle multiple cdrom drives
    9.35  //   - Here are the cdrom boot failure codes:
    9.36 @@ -102,13 +102,13 @@
    9.37  //      12 : can not read cd - boot image
    9.38  //
    9.39  //   ATA driver
    9.40 -//   - EBDA segment. 
    9.41 +//   - EBDA segment.
    9.42  //     I used memory starting at 0x121 in the segment
    9.43  //   - the translation policy is defined in cmos regs 0x39 & 0x3a
    9.44  //
    9.45  // TODO :
    9.46  //
    9.47 -//   int74 
    9.48 +//   int74
    9.49  //     - needs to be reworked.  Uses direct [bp] offsets. (?)
    9.50  //
    9.51  //   int13:
    9.52 @@ -128,13 +128,13 @@
    9.53  //   - Implement remaining int13_cdemu functions (as defined by El-Torito specs)
    9.54  //   - cdrom drive is hardcoded to ide 0 device 1 in several places. see "FIXME ElTorito Hardcoded"
    9.55  //   - int13 Fix DL when emulating a cd. In that case DL is decremented before calling real int13.
    9.56 -//     This is ok. But DL should be reincremented afterwards. 
    9.57 +//     This is ok. But DL should be reincremented afterwards.
    9.58  //   - Fix all "FIXME ElTorito Various"
    9.59  //   - should be able to boot any cdrom instead of the first one
    9.60  //
    9.61  //   BCC Bug: find a generic way to handle the bug of #asm after an "if"  (fixed in 0.16.7)
    9.62  
    9.63 -#define DEBUG_ROMBIOS      0
    9.64 +#include "rombios.h"
    9.65  
    9.66  #define DEBUG_ATA          0
    9.67  #define DEBUG_INT13_HD     0
    9.68 @@ -159,7 +159,7 @@
    9.69  #define BX_USE_ATADRV    1
    9.70  #define BX_ELTORITO_BOOT 1
    9.71  
    9.72 -#define BX_TCGBIOS       0              /* main switch for TCG BIOS ext. */
    9.73 +#define BX_TCGBIOS       0   /* main switch for TCG BIOS ext. */
    9.74  
    9.75  #define BX_MAX_ATA_INTERFACES   4
    9.76  #define BX_MAX_ATA_DEVICES      (BX_MAX_ATA_INTERFACES*2)
    9.77 @@ -183,14 +183,18 @@
    9.78  #define EBDA_SIZE          1              // In KiB
    9.79  #define BASE_MEM_IN_K   (640 - EBDA_SIZE)
    9.80  
    9.81 -  // Define the application NAME
    9.82 -#ifdef HVMASSIST
    9.83 -#  define BX_APPNAME "HVMAssist"
    9.84 -#elif PLEX86
    9.85 -#  define BX_APPNAME "Plex86"
    9.86 -#else
    9.87 -#  define BX_APPNAME "Bochs"
    9.88 -#endif
    9.89 +/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
    9.90 +#define IPL_TABLE_OFFSET     0x0300  /* offset from EBDA */
    9.91 +#define IPL_TABLE_ENTRIES    8
    9.92 +#define IPL_COUNT_OFFSET     0x0380  /* u16: number of valid table entries */
    9.93 +#define IPL_SEQUENCE_OFFSET  0x0382  /* u16: next boot device */
    9.94 +#define IPL_BOOTFIRST_OFFSET 0x0384  /* u16: user selected device */
    9.95 +#define IPL_SIZE             0xff
    9.96 +#define IPL_TYPE_FLOPPY      0x01
    9.97 +#define IPL_TYPE_HARDDISK    0x02
    9.98 +#define IPL_TYPE_CDROM       0x03
    9.99 +#define IPL_TYPE_BEV         0x80
   9.100 +
   9.101  
   9.102    // Sanity Checks
   9.103  #if BX_USE_ATADRV && BX_CPU<3
   9.104 @@ -209,15 +213,10 @@
   9.105  #    error APM BIOS can only be used with 386+ cpu
   9.106  #endif
   9.107  
   9.108 -#ifndef BX_SMP_PROCESSORS
   9.109 -#define BX_SMP_PROCESSORS 1
   9.110 -#    warning BX_SMP_PROCESSORS not defined, defaulting to 1
   9.111 -#endif
   9.112 -  
   9.113 -#define PANIC_PORT  0x400
   9.114 -#define PANIC_PORT2 0x401
   9.115 -#define INFO_PORT   0x402
   9.116 -#define DEBUG_PORT  0x403
   9.117 +// define this if you want to make PCIBIOS working on a specific bridge only
   9.118 +// undef enables PCIBIOS when at least one PCI device is found
   9.119 +// i440FX is emulated by Bochs and QEMU
   9.120 +#define PCI_FIXED_HOST_BRIDGE 0x12378086 ;; i440FX PCI bridge
   9.121  
   9.122  // #20  is dec 20
   9.123  // #$20 is hex 20 = 32
   9.124 @@ -250,7 +249,7 @@ use16 286
   9.125  
   9.126  MACRO HALT
   9.127    ;; the HALT macro is called with the line number of the HALT call.
   9.128 -  ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex 
   9.129 +  ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex
   9.130    ;; to print a BX_PANIC message.  This will normally halt the simulation
   9.131    ;; with a message such as "BIOS panic at rombios.c, line 4091".
   9.132    ;; However, users can choose to make panics non-fatal and continue.
   9.133 @@ -289,9 +288,9 @@ typedef unsigned long  Bit32u;
   9.134    void memsetb(seg,offset,value,count);
   9.135    void memcpyb(dseg,doffset,sseg,soffset,count);
   9.136    void memcpyd(dseg,doffset,sseg,soffset,count);
   9.137 -  
   9.138 +
   9.139    // memset of count bytes
   9.140 -    void 
   9.141 +    void
   9.142    memsetb(seg,offset,value,count)
   9.143      Bit16u seg;
   9.144      Bit16u offset;
   9.145 @@ -301,14 +300,14 @@ typedef unsigned long  Bit32u;
   9.146    ASM_START
   9.147      push bp
   9.148      mov  bp, sp
   9.149 -  
   9.150 +
   9.151        push ax
   9.152        push cx
   9.153        push es
   9.154        push di
   9.155 -  
   9.156 +
   9.157        mov  cx, 10[bp] ; count
   9.158 -      cmp  cx, #0x00
   9.159 +      test cx, cx
   9.160        je   memsetb_end
   9.161        mov  ax, 4[bp] ; segment
   9.162        mov  es, ax
   9.163 @@ -318,19 +317,19 @@ typedef unsigned long  Bit32u;
   9.164        cld
   9.165        rep
   9.166         stosb
   9.167 -  
   9.168 +
   9.169    memsetb_end:
   9.170        pop di
   9.171        pop es
   9.172        pop cx
   9.173        pop ax
   9.174 -  
   9.175 +
   9.176      pop bp
   9.177    ASM_END
   9.178    }
   9.179 -  
   9.180 +
   9.181    // memcpy of count bytes
   9.182 -    void 
   9.183 +    void
   9.184    memcpyb(dseg,doffset,sseg,soffset,count)
   9.185      Bit16u dseg;
   9.186      Bit16u doffset;
   9.187 @@ -341,16 +340,16 @@ typedef unsigned long  Bit32u;
   9.188    ASM_START
   9.189      push bp
   9.190      mov  bp, sp
   9.191 -  
   9.192 +
   9.193        push ax
   9.194        push cx
   9.195        push es
   9.196        push di
   9.197        push ds
   9.198        push si
   9.199 -  
   9.200 +
   9.201        mov  cx, 12[bp] ; count
   9.202 -      cmp  cx, #0x0000
   9.203 +      test cx, cx
   9.204        je   memcpyb_end
   9.205        mov  ax, 4[bp] ; dsegment
   9.206        mov  es, ax
   9.207 @@ -363,7 +362,7 @@ typedef unsigned long  Bit32u;
   9.208        cld
   9.209        rep
   9.210         movsb
   9.211 -  
   9.212 +
   9.213    memcpyb_end:
   9.214        pop si
   9.215        pop ds
   9.216 @@ -371,14 +370,13 @@ typedef unsigned long  Bit32u;
   9.217        pop es
   9.218        pop cx
   9.219        pop ax
   9.220 -  
   9.221 +
   9.222      pop bp
   9.223    ASM_END
   9.224    }
   9.225  
   9.226 -#if 0 
   9.227    // memcpy of count dword
   9.228 -    void 
   9.229 +    void
   9.230    memcpyd(dseg,doffset,sseg,soffset,count)
   9.231      Bit16u dseg;
   9.232      Bit16u doffset;
   9.233 @@ -389,16 +387,16 @@ typedef unsigned long  Bit32u;
   9.234    ASM_START
   9.235      push bp
   9.236      mov  bp, sp
   9.237 -  
   9.238 +
   9.239        push ax
   9.240        push cx
   9.241        push es
   9.242        push di
   9.243        push ds
   9.244        push si
   9.245 -  
   9.246 +
   9.247        mov  cx, 12[bp] ; count
   9.248 -      cmp  cx, #0x0000
   9.249 +      test cx, cx
   9.250        je   memcpyd_end
   9.251        mov  ax, 4[bp] ; dsegment
   9.252        mov  es, ax
   9.253 @@ -411,7 +409,7 @@ typedef unsigned long  Bit32u;
   9.254        cld
   9.255        rep
   9.256         movsd
   9.257 -  
   9.258 +
   9.259    memcpyd_end:
   9.260        pop si
   9.261        pop ds
   9.262 @@ -419,16 +417,15 @@ typedef unsigned long  Bit32u;
   9.263        pop es
   9.264        pop cx
   9.265        pop ax
   9.266 -  
   9.267 +
   9.268      pop bp
   9.269    ASM_END
   9.270    }
   9.271 -#endif
   9.272  
   9.273    // read_dword and write_dword functions
   9.274    static Bit32u         read_dword();
   9.275    static void           write_dword();
   9.276 -  
   9.277 +
   9.278      Bit32u
   9.279    read_dword(seg, offset)
   9.280      Bit16u seg;
   9.281 @@ -437,25 +434,24 @@ typedef unsigned long  Bit32u;
   9.282    ASM_START
   9.283      push bp
   9.284      mov  bp, sp
   9.285 -  
   9.286 +
   9.287        push bx
   9.288        push ds
   9.289        mov  ax, 4[bp] ; segment
   9.290        mov  ds, ax
   9.291        mov  bx, 6[bp] ; offset
   9.292        mov  ax, [bx]
   9.293 -      inc  bx
   9.294 -      inc  bx
   9.295 +      add  bx, #2
   9.296        mov  dx, [bx]
   9.297        ;; ax = return value (word)
   9.298        ;; dx = return value (word)
   9.299        pop  ds
   9.300        pop  bx
   9.301 -  
   9.302 +
   9.303      pop  bp
   9.304    ASM_END
   9.305    }
   9.306 -  
   9.307 +
   9.308      void
   9.309    write_dword(seg, offset, data)
   9.310      Bit16u seg;
   9.311 @@ -465,7 +461,7 @@ typedef unsigned long  Bit32u;
   9.312    ASM_START
   9.313      push bp
   9.314      mov  bp, sp
   9.315 -  
   9.316 +
   9.317        push ax
   9.318        push bx
   9.319        push ds
   9.320 @@ -474,50 +470,49 @@ typedef unsigned long  Bit32u;
   9.321        mov  bx, 6[bp] ; offset
   9.322        mov  ax, 8[bp] ; data word
   9.323        mov  [bx], ax  ; write data word
   9.324 -      inc  bx
   9.325 -      inc  bx
   9.326 +      add  bx, #2
   9.327        mov  ax, 10[bp] ; data word
   9.328        mov  [bx], ax  ; write data word
   9.329        pop  ds
   9.330        pop  bx
   9.331        pop  ax
   9.332 -  
   9.333 +
   9.334      pop  bp
   9.335    ASM_END
   9.336    }
   9.337 -  
   9.338 +
   9.339    // Bit32u (unsigned long) and long helper functions
   9.340    ASM_START
   9.341 -  
   9.342 +
   9.343    ;; and function
   9.344    landl:
   9.345    landul:
   9.346 -    SEG SS 
   9.347 +    SEG SS
   9.348        and ax,[di]
   9.349 -    SEG SS 
   9.350 +    SEG SS
   9.351        and bx,2[di]
   9.352      ret
   9.353 -  
   9.354 +
   9.355    ;; add function
   9.356    laddl:
   9.357    laddul:
   9.358 -    SEG SS 
   9.359 +    SEG SS
   9.360        add ax,[di]
   9.361 -    SEG SS 
   9.362 +    SEG SS
   9.363        adc bx,2[di]
   9.364      ret
   9.365 -  
   9.366 +
   9.367    ;; cmp function
   9.368    lcmpl:
   9.369    lcmpul:
   9.370      and eax, #0x0000FFFF
   9.371      shl ebx, #16
   9.372 -    add eax, ebx
   9.373 +    or  eax, ebx
   9.374      shr ebx, #16
   9.375      SEG SS
   9.376        cmp eax, dword ptr [di]
   9.377      ret
   9.378 -  
   9.379 +
   9.380    ;; sub function
   9.381    lsubl:
   9.382    lsubul:
   9.383 @@ -526,26 +521,26 @@ typedef unsigned long  Bit32u;
   9.384      SEG SS
   9.385      sbb bx,2[di]
   9.386      ret
   9.387 -  
   9.388 +
   9.389    ;; mul function
   9.390    lmull:
   9.391    lmulul:
   9.392      and eax, #0x0000FFFF
   9.393      shl ebx, #16
   9.394 -    add eax, ebx
   9.395 +    or  eax, ebx
   9.396      SEG SS
   9.397      mul eax, dword ptr [di]
   9.398      mov ebx, eax
   9.399      shr ebx, #16
   9.400      ret
   9.401 -  
   9.402 +
   9.403    ;; dec function
   9.404    ldecl:
   9.405    ldecul:
   9.406      SEG SS
   9.407      dec dword ptr [bx]
   9.408      ret
   9.409 -  
   9.410 +
   9.411    ;; or function
   9.412    lorl:
   9.413    lorul:
   9.414 @@ -554,31 +549,31 @@ typedef unsigned long  Bit32u;
   9.415      SEG SS
   9.416      or  bx,2[di]
   9.417      ret
   9.418 -  
   9.419 +
   9.420    ;; inc function
   9.421    lincl:
   9.422    lincul:
   9.423      SEG SS
   9.424      inc dword ptr [bx]
   9.425      ret
   9.426 -  
   9.427 +
   9.428    ;; tst function
   9.429    ltstl:
   9.430    ltstul:
   9.431      and eax, #0x0000FFFF
   9.432      shl ebx, #16
   9.433 -    add eax, ebx
   9.434 +    or  eax, ebx
   9.435      shr ebx, #16
   9.436      test eax, eax
   9.437      ret
   9.438 -  
   9.439 +
   9.440    ;; sr function
   9.441    lsrul:
   9.442      mov  cx,di
   9.443      jcxz lsr_exit
   9.444      and  eax, #0x0000FFFF
   9.445      shl  ebx, #16
   9.446 -    add  eax, ebx
   9.447 +    or   eax, ebx
   9.448    lsr_loop:
   9.449      shr  eax, #1
   9.450      loop lsr_loop
   9.451 @@ -586,7 +581,7 @@ typedef unsigned long  Bit32u;
   9.452      shr  ebx, #16
   9.453    lsr_exit:
   9.454      ret
   9.455 -  
   9.456 +
   9.457    ;; sl function
   9.458    lsll:
   9.459    lslul:
   9.460 @@ -594,15 +589,15 @@ typedef unsigned long  Bit32u;
   9.461      jcxz lsl_exit
   9.462      and  eax, #0x0000FFFF
   9.463      shl  ebx, #16
   9.464 -    add  eax, ebx
   9.465 -  lsl_loop: 
   9.466 +    or   eax, ebx
   9.467 +  lsl_loop:
   9.468      shl  eax, #1
   9.469      loop lsl_loop
   9.470      mov  ebx, eax
   9.471      shr  ebx, #16
   9.472    lsl_exit:
   9.473      ret
   9.474 -  
   9.475 +
   9.476    idiv_:
   9.477      cwd
   9.478      idiv bx
   9.479 @@ -616,7 +611,7 @@ typedef unsigned long  Bit32u;
   9.480    ldivul:
   9.481      and  eax, #0x0000FFFF
   9.482      shl  ebx, #16
   9.483 -    add  eax, ebx
   9.484 +    or   eax, ebx
   9.485      xor  edx, edx
   9.486      SEG SS
   9.487      mov  bx,  2[di]
   9.488 @@ -665,7 +660,7 @@ typedef struct {
   9.489      Bit8u  revision;
   9.490      Bit8u  checksum;
   9.491      } dpte_t;
   9.492 - 
   9.493 +
   9.494    typedef struct {
   9.495      Bit8u  iface;        // ISA or PCI
   9.496      Bit16u iobase1;      // IO Base 1
   9.497 @@ -678,15 +673,15 @@ typedef struct {
   9.498      Bit8u  device;       // Detected type of attached devices (hd/cd/none)
   9.499      Bit8u  removable;    // Removable device flag
   9.500      Bit8u  lock;         // Locks for removable devices
   9.501 -    // Bit8u  lba_capable;  // LBA capable flag - always yes for bochs devices
   9.502 -    Bit8u  mode;         // transfert mode : PIO 16/32 bits - IRQ - ISADMA - PCIDMA
   9.503 +    Bit8u  mode;         // transfer mode : PIO 16/32 bits - IRQ - ISADMA - PCIDMA
   9.504      Bit16u blksize;      // block size
   9.505  
   9.506      Bit8u  translation;  // type of translation
   9.507      chs_t  lchs;         // Logical CHS
   9.508      chs_t  pchs;         // Physical CHS
   9.509  
   9.510 -    Bit32u sectors;      // Total sectors count
   9.511 +    Bit32u sectors_low;  // Total sectors count
   9.512 +    Bit32u sectors_high;
   9.513      } ata_device_t;
   9.514  
   9.515    typedef struct {
   9.516 @@ -697,10 +692,10 @@ typedef struct {
   9.517      ata_device_t  devices[BX_MAX_ATA_DEVICES];
   9.518      //
   9.519      // map between (bios hd id - 0x80) and ata channels
   9.520 -    Bit8u  hdcount, hdidmap[BX_MAX_ATA_DEVICES];                
   9.521 +    Bit8u  hdcount, hdidmap[BX_MAX_ATA_DEVICES];
   9.522  
   9.523      // map between (bios cd id - 0xE0) and ata channels
   9.524 -    Bit8u  cdcount, cdidmap[BX_MAX_ATA_DEVICES];                
   9.525 +    Bit8u  cdcount, cdidmap[BX_MAX_ATA_DEVICES];
   9.526  
   9.527      // Buffer for DPTE table
   9.528      dpte_t dpte;
   9.529 @@ -710,9 +705,9 @@ typedef struct {
   9.530      Bit32u trsfbytes;
   9.531  
   9.532      } ata_t;
   9.533 -  
   9.534 +
   9.535  #if BX_ELTORITO_BOOT
   9.536 -  // ElTorito Device Emulation data 
   9.537 +  // ElTorito Device Emulation data
   9.538    typedef struct {
   9.539      Bit8u  active;
   9.540      Bit8u  media;
   9.541 @@ -723,20 +718,20 @@ typedef struct {
   9.542      Bit16u buffer_segment;
   9.543      Bit16u load_segment;
   9.544      Bit16u sector_count;
   9.545 -    
   9.546 +
   9.547      // Virtual device
   9.548      chs_t  vdevice;
   9.549      } cdemu_t;
   9.550  #endif // BX_ELTORITO_BOOT
   9.551 -  
   9.552 +
   9.553  #include "32bitgateway.h"
   9.554  
   9.555    // for access to EBDA area
   9.556 -  //     The EBDA structure should conform to 
   9.557 -  //     http://www.cybertrails.com/~fys/rombios.htm document
   9.558 +  //     The EBDA structure should conform to
   9.559 +  //     http://www.frontiernet.net/~fys/rombios.htm document
   9.560    //     I made the ata and cdemu structs begin at 0x121 in the EBDA seg
   9.561 -  // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot 
   9.562 -  // device tables are at 0x9ff00 -- 0x9ffff
   9.563 +  // EBDA must be at most 768 bytes; it lives at EBDA_SEG, and the boot
   9.564 +  // device tables are at EBDA_SEG:IPL_TABLE_OFFSET
   9.565    typedef struct {
   9.566      unsigned char ebda_size;
   9.567      unsigned char cmos_shutdown_status;
   9.568 @@ -758,7 +753,7 @@ typedef struct {
   9.569  
   9.570      upcall_t upcall;
   9.571      } ebda_data_t;
   9.572 -  
   9.573 +
   9.574    #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
   9.575    #define EbdaData ((ebda_data_t *) 0)
   9.576  
   9.577 @@ -772,7 +767,7 @@ typedef struct {
   9.578      Bit32u lba1;
   9.579      Bit32u lba2;
   9.580      } int13ext_t;
   9.581 - 
   9.582 +
   9.583    #define Int13Ext ((int13ext_t *) 0)
   9.584  
   9.585    // Disk Physical Table definition
   9.586 @@ -798,7 +793,7 @@ typedef struct {
   9.587      Bit8u   reserved3;
   9.588      Bit8u   checksum;
   9.589      } dpt_t;
   9.590 - 
   9.591 +
   9.592    #define Int13DPT ((dpt_t *) 0)
   9.593  
   9.594  #endif // BX_USE_ATADRV
   9.595 @@ -828,9 +823,9 @@ typedef struct {
   9.596      } r16;
   9.597    struct {
   9.598      Bit32u filler[4];
   9.599 -    Bit8u  bl, bh; 
   9.600 +    Bit8u  bl, bh;
   9.601      Bit16u filler1;
   9.602 -    Bit8u  dl, dh; 
   9.603 +    Bit8u  dl, dh;
   9.604      Bit16u filler2;
   9.605      Bit8u  cl, ch;
   9.606      Bit16u filler3;
   9.607 @@ -864,6 +859,14 @@ typedef struct {
   9.608    flags_t flags;
   9.609    } iret_addr_t;
   9.610  
   9.611 +typedef struct {
   9.612 +  Bit16u type;
   9.613 +  Bit16u flags;
   9.614 +  Bit32u vector;
   9.615 +  Bit32u description;
   9.616 +  Bit32u reserved;
   9.617 +  } ipl_entry_t;
   9.618 +
   9.619  
   9.620  
   9.621  static Bit8u          inb();
   9.622 @@ -903,8 +906,6 @@ static void           int1a_function();
   9.623  static void           int70_function();
   9.624  static void           int74_function();
   9.625  static Bit16u         get_CS();
   9.626 -//static Bit16u         get_DS();
   9.627 -//static void           set_DS();
   9.628  static Bit16u         get_SS();
   9.629  static unsigned int   enqueue_key();
   9.630  static unsigned int   dequeue_key();
   9.631 @@ -923,7 +924,10 @@ static void           keyboard_init();
   9.632  static void           keyboard_panic();
   9.633  static void           shutdown_status_panic();
   9.634  static void           nmi_handler_msg();
   9.635 -
   9.636 +static void           delay_ticks();
   9.637 +static void           delay_ticks_and_check_for_keystroke();
   9.638 +
   9.639 +static void           interactive_bootkey();
   9.640  static void           print_bios_banner();
   9.641  static void           print_boot_device();
   9.642  static void           print_boot_failure();
   9.643 @@ -957,33 +961,9 @@ Bit16u cdrom_boot();
   9.644  
   9.645  #endif // BX_ELTORITO_BOOT
   9.646  
   9.647 -static char bios_cvs_version_string[] = "$Revision: 1.138 $";
   9.648 -static char bios_date_string[] = "$Date: 2005/05/07 15:55:26 $";
   9.649 -
   9.650 -static char CVSID[] = "$Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp $";
   9.651 -
   9.652 -/* Offset to skip the CVS $Id: prefix */ 
   9.653 -#define bios_version_string  (CVSID + 4)
   9.654 -
   9.655 -#define BIOS_PRINTF_HALT     1
   9.656 -#define BIOS_PRINTF_SCREEN   2
   9.657 -#define BIOS_PRINTF_INFO     4
   9.658 -#define BIOS_PRINTF_DEBUG    8
   9.659 -#define BIOS_PRINTF_ALL      (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
   9.660 -#define BIOS_PRINTF_DEBHALT  (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | BIOS_PRINTF_HALT)
   9.661 -
   9.662 -#define printf(format, p...)  bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
   9.663 -
   9.664 -// Defines the output macros. 
   9.665 -// BX_DEBUG goes to INFO port until we can easily choose debug info on a 
   9.666 -// per-device basis. Debug info are sent only in debug mode
   9.667 -#if DEBUG_ROMBIOS
   9.668 -#  define BX_DEBUG(format, p...)  bios_printf(BIOS_PRINTF_INFO, format, ##p)    
   9.669 -#else
   9.670 -#  define BX_DEBUG(format, p...) 
   9.671 -#endif
   9.672 -#define BX_INFO(format, p...)   bios_printf(BIOS_PRINTF_INFO, format, ##p)
   9.673 -#define BX_PANIC(format, p...)  bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
   9.674 +static char bios_cvs_version_string[] = "$Revision: 1.221 $ $Date: 2008/12/07 17:32:29 $";
   9.675 +
   9.676 +#define BIOS_COPYRIGHT_STRING "(c) 2002 MandrakeSoft S.A. Written by Kevin Lawton & the Bochs team."
   9.677  
   9.678  #if DEBUG_ATA
   9.679  #  define BX_DEBUG_ATA(a...) BX_DEBUG(a)
   9.680 @@ -1156,9 +1136,9 @@ static struct {
   9.681        { 0x5100, 0x5133, 0x7600,   none, 0x20 }, /* 3 PgDn */
   9.682        { 0x5200, 0x5230,   none,   none, 0x20 }, /* 0 Ins */
   9.683        { 0x5300, 0x532e,   none,   none, 0x20 }, /* Del */
   9.684 -      {   none,   none,   none,   none, none }, /* ??? */
   9.685 -      {   none,   none,   none,   none, none }, /* ??? */
   9.686 -      {   none,   none,   none,   none, none }, /* ??? */
   9.687 +      {   none,   none,   none,   none, none },
   9.688 +      {   none,   none,   none,   none, none },
   9.689 +      { 0x565c, 0x567c,   none,   none, none }, /* \| */
   9.690        { 0x8500, 0x8700, 0x8900, 0x8b00, none }, /* F11 */
   9.691        { 0x8600, 0x8800, 0x8a00, 0x8c00, none }, /* F12 */
   9.692        };
   9.693 @@ -1415,31 +1395,6 @@ ASM_START
   9.694  ASM_END
   9.695  }
   9.696  
   9.697 -//  Bit16u
   9.698 -//get_DS()
   9.699 -//{
   9.700 -//ASM_START
   9.701 -//  mov  ax, ds
   9.702 -//ASM_END
   9.703 -//}
   9.704 -//
   9.705 -//  void
   9.706 -//set_DS(ds_selector)
   9.707 -//  Bit16u ds_selector;
   9.708 -//{
   9.709 -//ASM_START
   9.710 -//  push bp
   9.711 -//  mov  bp, sp
   9.712 -//
   9.713 -//    push ax
   9.714 -//    mov  ax, 4[bp] ; ds_selector
   9.715 -//    mov  ds, ax
   9.716 -//    pop  ax
   9.717 -//
   9.718 -//  pop  bp
   9.719 -//ASM_END
   9.720 -//}
   9.721 -
   9.722    Bit16u
   9.723  get_SS()
   9.724  {
   9.725 @@ -1455,7 +1410,7 @@ copy_e820_table()
   9.726    Bit8u nr_entries = read_byte(0x9000, 0x1e8);
   9.727    Bit32u base_mem;
   9.728    if (nr_entries > 32)
   9.729 -  	nr_entries = 32;
   9.730 +       nr_entries = 32;
   9.731    write_word(0xe000, 0x8, nr_entries);
   9.732    memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
   9.733    /* Report the proper base memory size at address 0x0413: otherwise
   9.734 @@ -1563,7 +1518,7 @@ wrch(c)
   9.735    pop  bp
   9.736    ASM_END
   9.737  }
   9.738 - 
   9.739 +
   9.740    void
   9.741  send(action, c)
   9.742    Bit16u action;
   9.743 @@ -1619,14 +1574,121 @@ put_uint(action, val, width, neg)
   9.744    send(action, val - (nval * 10) + '0');
   9.745  }
   9.746  
   9.747 +  void
   9.748 +put_luint(action, val, width, neg)
   9.749 +  Bit16u action;
   9.750 +  unsigned long val;
   9.751 +  short width;
   9.752 +  bx_bool neg;
   9.753 +{
   9.754 +  unsigned long nval = val / 10;
   9.755 +  if (nval)
   9.756 +    put_luint(action, nval, width - 1, neg);
   9.757 +  else {
   9.758 +    while (--width > 0) send(action, ' ');
   9.759 +    if (neg) send(action, '-');
   9.760 +  }
   9.761 +  send(action, val - (nval * 10) + '0');
   9.762 +}
   9.763 +
   9.764 +void put_str(action, segment, offset)
   9.765 +  Bit16u action;
   9.766 +  Bit16u segment;
   9.767 +  Bit16u offset;
   9.768 +{
   9.769 +  Bit8u c;
   9.770 +
   9.771 +  while (c = read_byte(segment, offset)) {
   9.772 +    send(action, c);
   9.773 +    offset++;
   9.774 +  }
   9.775 +}
   9.776 +
   9.777 +  void
   9.778 +delay_ticks(ticks)
   9.779 +  Bit16u ticks;
   9.780 +{
   9.781 +  long ticks_to_wait, delta;
   9.782 +  Bit32u prev_ticks, t;
   9.783 +
   9.784 +   /*
   9.785 +    * The 0:046c wraps around at 'midnight' according to a 18.2Hz clock.
   9.786 +    * We also have to be careful about interrupt storms.
   9.787 +    */
   9.788 +ASM_START
   9.789 +  pushf
   9.790 +  sti
   9.791 +ASM_END
   9.792 +  ticks_to_wait = ticks;
   9.793 +  prev_ticks = read_dword(0x0, 0x46c);
   9.794 +  do
   9.795 +  {
   9.796 +ASM_START
   9.797 +    hlt
   9.798 +ASM_END
   9.799 +    t = read_dword(0x0, 0x46c);
   9.800 +    if (t > prev_ticks)
   9.801 +    {
   9.802 +      delta = t - prev_ticks;     /* The temp var is required or bcc screws up. */
   9.803 +      ticks_to_wait -= delta;
   9.804 +    }
   9.805 +    else if (t < prev_ticks)
   9.806 +    {
   9.807 +      ticks_to_wait -= t;         /* wrapped */
   9.808 +    }
   9.809 +
   9.810 +    prev_ticks = t;
   9.811 +  } while (ticks_to_wait > 0);
   9.812 +ASM_START
   9.813 +  cli
   9.814 +  popf
   9.815 +ASM_END
   9.816 +}
   9.817 +
   9.818 +  Bit8u
   9.819 +check_for_keystroke()
   9.820 +{
   9.821 +ASM_START
   9.822 +  mov  ax, #0x100
   9.823 +  int  #0x16
   9.824 +  jz   no_key
   9.825 +  mov  al, #1
   9.826 +  jmp  done
   9.827 +no_key:
   9.828 +  xor  al, al
   9.829 +done:
   9.830 +ASM_END
   9.831 +}
   9.832 +
   9.833 +  Bit8u
   9.834 +get_keystroke()
   9.835 +{
   9.836 +ASM_START
   9.837 +  mov  ax, #0x0
   9.838 +  int  #0x16
   9.839 +  xchg ah, al
   9.840 +ASM_END
   9.841 +}
   9.842 +
   9.843 +  void
   9.844 +delay_ticks_and_check_for_keystroke(ticks, count)
   9.845 +  Bit16u ticks, count;
   9.846 +{
   9.847 +  Bit16u i;
   9.848 +  for (i = 1; i <= count; i++) {
   9.849 +    delay_ticks(ticks);
   9.850 +    if (check_for_keystroke())
   9.851 +      break;
   9.852 +  }
   9.853 +}
   9.854 +
   9.855  //--------------------------------------------------------------------------
   9.856  // bios_printf()
   9.857 -//   A compact variable argument printf function which prints its output via
   9.858 -//   an I/O port so that it can be logged by Bochs/Plex.  
   9.859 -//   Currently, only %x is supported (or %02x, %04x, etc).
   9.860 +//   A compact variable argument printf function.
   9.861  //
   9.862 -//   Supports %[format_width][format]
   9.863 -//   where format can be d,x,c,s
   9.864 +//   Supports %[format_width][length]format
   9.865 +//   where format can be x,X,u,d,s,S,c
   9.866 +//   and the optional length modifier is l (ell)
   9.867  //--------------------------------------------------------------------------
   9.868    void
   9.869  bios_printf(action, s)
   9.870 @@ -1637,7 +1699,7 @@ bios_printf(action, s)
   9.871    bx_bool  in_format;
   9.872    short i;
   9.873    Bit16u  *arg_ptr;
   9.874 -  Bit16u   arg_seg, arg, nibble, shift_count, format_width;
   9.875 +  Bit16u   arg_seg, arg, nibble, hibyte, shift_count, format_width, hexadd;
   9.876  
   9.877    arg_ptr = &s;
   9.878    arg_seg = get_SS();
   9.879 @@ -1664,17 +1726,49 @@ bios_printf(action, s)
   9.880        else {
   9.881          arg_ptr++; // increment to next arg
   9.882          arg = read_word(arg_seg, arg_ptr);
   9.883 -        if (c == 'x') {
   9.884 +        if (c == 'x' || c == 'X') {
   9.885            if (format_width == 0)
   9.886              format_width = 4;
   9.887 +          if (c == 'x')
   9.888 +            hexadd = 'a';
   9.889 +          else
   9.890 +            hexadd = 'A';
   9.891            for (i=format_width-1; i>=0; i--) {
   9.892              nibble = (arg >> (4 * i)) & 0x000f;
   9.893 -            send (action, (nibble<=9)? (nibble+'0') : (nibble-10+'A'));
   9.894 +            send (action, (nibble<=9)? (nibble+'0') : (nibble-10+hexadd));
   9.895              }
   9.896            }
   9.897          else if (c == 'u') {
   9.898            put_uint(action, arg, format_width, 0);
   9.899            }
   9.900 +        else if (c == 'l') {
   9.901 +          s++;
   9.902 +          c = read_byte(get_CS(), s); /* is it ld,lx,lu? */
   9.903 +          arg_ptr++; /* increment to next arg */
   9.904 +          hibyte = read_word(arg_seg, arg_ptr);
   9.905 +          if (c == 'd') {
   9.906 +            if (hibyte & 0x8000)
   9.907 +              put_luint(action, 0L-(((Bit32u) hibyte << 16) | arg), format_width-1, 1);
   9.908 +            else
   9.909 +              put_luint(action, ((Bit32u) hibyte << 16) | arg, format_width, 0);
   9.910 +           }
   9.911 +          else if (c == 'u') {
   9.912 +            put_luint(action, ((Bit32u) hibyte << 16) | arg, format_width, 0);
   9.913 +           }
   9.914 +          else if (c == 'x' || c == 'X')
   9.915 +           {
   9.916 +            if (format_width == 0)
   9.917 +              format_width = 8;
   9.918 +            if (c == 'x')
   9.919 +              hexadd = 'a';
   9.920 +            else
   9.921 +              hexadd = 'A';
   9.922 +            for (i=format_width-1; i>=0; i--) {
   9.923 +              nibble = ((((Bit32u) hibyte <<16) | arg) >> (4 * i)) & 0x000f;
   9.924 +              send (action, (nibble<=9)? (nibble+'0') : (nibble-10+hexadd));
   9.925 +              }
   9.926 +           }
   9.927 +          }
   9.928          else if (c == 'd') {
   9.929            if (arg & 0x8000)
   9.930              put_int(action, -arg, format_width - 1, 1);
   9.931 @@ -1682,7 +1776,13 @@ bios_printf(action, s)
   9.932              put_int(action, arg, format_width, 0);
   9.933            }
   9.934          else if (c == 's') {
   9.935 -          bios_printf(action & (~BIOS_PRINTF_HALT), arg);
   9.936 +          put_str(action, get_CS(), arg);
   9.937 +          }
   9.938 +        else if (c == 'S') {
   9.939 +          hibyte = arg;
   9.940 +          arg_ptr++;
   9.941 +          arg = read_word(arg_seg, arg_ptr);
   9.942 +          put_str(action, hibyte, arg);
   9.943            }
   9.944          else if (c == 'c') {
   9.945            send(action, arg);
   9.946 @@ -1699,7 +1799,7 @@ bios_printf(action, s)
   9.947      }
   9.948  
   9.949    if (action & BIOS_PRINTF_HALT) {
   9.950 -    // freeze in a busy loop.  
   9.951 +    // freeze in a busy loop.
   9.952  ASM_START
   9.953      cli
   9.954   halt2_loop:
   9.955 @@ -1733,8 +1833,8 @@ keyboard_init()
   9.956              max = 0x2000;
   9.957              }
   9.958          }
   9.959 -  
   9.960 -    // Due to timer issues, and if the IPS setting is > 15000000, 
   9.961 +
   9.962 +    // Due to timer issues, and if the IPS setting is > 15000000,
   9.963      // the incoming keys might not be flushed here. That will
   9.964      // cause a panic a few lines below.  See sourceforge bug report :
   9.965      // [ 642031 ] FATAL: Keyboard RESET error:993
   9.966 @@ -1871,13 +1971,12 @@ keyboard_init()
   9.967  keyboard_panic(status)
   9.968    Bit16u status;
   9.969  {
   9.970 -  // If you're getting a 993 keyboard panic here, 
   9.971 +  // If you're getting a 993 keyboard panic here,
   9.972    // please see the comment in keyboard_init
   9.973 -  
   9.974 +
   9.975    BX_PANIC("Keyboard error:%u\n",status);
   9.976  }
   9.977  
   9.978 -
   9.979  #define CMOS_SHUTDOWN_S3 0xFE
   9.980  //--------------------------------------------------------------------------
   9.981  // machine_reset
   9.982 @@ -1932,6 +2031,11 @@ shutdown_status_panic(status)
   9.983    BX_PANIC("Unimplemented shutdown status: %02x\n",(Bit8u)status);
   9.984  }
   9.985  
   9.986 +void s3_resume_panic()
   9.987 +{
   9.988 +  BX_PANIC("Returned from s3_resume.\n");
   9.989 +}
   9.990 +
   9.991  //--------------------------------------------------------------------------
   9.992  // print_bios_banner
   9.993  //   displays a the bios version
   9.994 @@ -1939,108 +2043,197 @@ shutdown_status_panic(status)
   9.995  void
   9.996  print_bios_banner()
   9.997  {
   9.998 -  printf(BX_APPNAME" BIOS, %d cpu%s, ", BX_SMP_PROCESSORS, BX_SMP_PROCESSORS>1?"s":"");
   9.999 -  printf("%s %s\n", bios_cvs_version_string, bios_date_string);
  9.1000 +  printf(BX_APPNAME" BIOS - build: %s\n%s\nOptions: ",
  9.1001 +    BIOS_BUILD_DATE, bios_cvs_version_string);
  9.1002 +  printf(
  9.1003 +#if BX_APM
  9.1004 +  "apmbios "
  9.1005 +#endif
  9.1006 +#if BX_PCIBIOS
  9.1007 +  "pcibios "
  9.1008 +#endif
  9.1009 +#if BX_ELTORITO_BOOT
  9.1010 +  "eltorito "
  9.1011 +#endif
  9.1012 +#if BX_ROMBIOS32
  9.1013 +  "rombios32 "
  9.1014 +#endif
  9.1015  #if BX_TCGBIOS
  9.1016 -  printf("TCG-enabled BIOS.\n");
  9.1017 -#endif
  9.1018 -  printf("\n");
  9.1019 -}
  9.1020 -
  9.1021 +  "TCG-enabled"
  9.1022 +#endif
  9.1023 +  "\n\n");
  9.1024 +}
  9.1025  
  9.1026  //--------------------------------------------------------------------------
  9.1027  // BIOS Boot Specification 1.0.1 compatibility
  9.1028  //
  9.1029 -// Very basic support for the BIOS Boot Specification, which allows expansion 
  9.1030 -// ROMs to register themselves as boot devices, instead of just stealing the 
  9.1031 +// Very basic support for the BIOS Boot Specification, which allows expansion
  9.1032 +// ROMs to register themselves as boot devices, instead of just stealing the
  9.1033  // INT 19h boot vector.
  9.1034 -// 
  9.1035 +//
  9.1036  // This is a hack: to do it properly requires a proper PnP BIOS and we aren't
  9.1037 -// one; we just lie to the option ROMs to make them behave correctly. 
  9.1038 -// We also don't support letting option ROMs register as bootable disk 
  9.1039 -// drives (BCVs), only as bootable devices (BEVs). 
  9.1040 +// one; we just lie to the option ROMs to make them behave correctly.
  9.1041 +// We also don't support letting option ROMs register as bootable disk
  9.1042 +// drives (BCVs), only as bootable devices (BEVs).
  9.1043  //
  9.1044  // http://www.phoenix.com/en/Customer+Services/White+Papers-Specs/pc+industry+specifications.htm
  9.1045  //--------------------------------------------------------------------------
  9.1046  
  9.1047 -/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
  9.1048 -#define IPL_SEG              0x9ff0
  9.1049 -#define IPL_TABLE_OFFSET     0x0000
  9.1050 -#define IPL_TABLE_ENTRIES    8
  9.1051 -#define IPL_COUNT_OFFSET     0x0080  /* u16: number of valid table entries */
  9.1052 -#define IPL_SEQUENCE_OFFSET  0x0082  /* u16: next boot device */
  9.1053 -
  9.1054 -struct ipl_entry {
  9.1055 -  Bit16u type;
  9.1056 -  Bit16u flags;
  9.1057 -  Bit32u vector;
  9.1058 -  Bit32u description;
  9.1059 -  Bit32u reserved;
  9.1060 -};
  9.1061 -
  9.1062 -static void 
  9.1063 -init_boot_vectors() 
  9.1064 -{
  9.1065 -  struct ipl_entry e; 
  9.1066 +static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
  9.1067 +
  9.1068 +static void
  9.1069 +init_boot_vectors()
  9.1070 +{
  9.1071 +  ipl_entry_t e;
  9.1072    Bit16u count = 0;
  9.1073    Bit16u ss = get_SS();
  9.1074 +  Bit16u ebda_seg = read_word(0x0040, 0x000E);
  9.1075  
  9.1076    /* Clear out the IPL table. */
  9.1077 -  memsetb(IPL_SEG, IPL_TABLE_OFFSET, 0, 0xff);
  9.1078 +  memsetb(ebda_seg, IPL_TABLE_OFFSET, 0, IPL_SIZE);
  9.1079 +
  9.1080 +  /* User selected device not set */
  9.1081 +  write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, 0xFFFF);
  9.1082  
  9.1083    /* Floppy drive */
  9.1084 -  e.type = 1; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
  9.1085 -  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
  9.1086 +  e.type = IPL_TYPE_FLOPPY; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
  9.1087 +  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
  9.1088    count++;
  9.1089  
  9.1090    /* First HDD */
  9.1091 -  e.type = 2; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
  9.1092 -  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
  9.1093 +  e.type = IPL_TYPE_HARDDISK; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
  9.1094 +  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
  9.1095    count++;
  9.1096  
  9.1097  #if BX_ELTORITO_BOOT
  9.1098    /* CDROM */
  9.1099 -  e.type = 3; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
  9.1100 -  memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
  9.1101 +  e.type = IPL_TYPE_CDROM; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
  9.1102 +  memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
  9.1103    count++;
  9.1104 -#endif  
  9.1105 +#endif
  9.1106  
  9.1107    /* Remember how many devices we have */
  9.1108 -  write_word(IPL_SEG, IPL_COUNT_OFFSET, count);
  9.1109 +  write_word(ebda_seg, IPL_COUNT_OFFSET, count);
  9.1110    /* Not tried booting anything yet */
  9.1111 -  write_word(IPL_SEG, IPL_SEQUENCE_OFFSET, 0xffff);
  9.1112 +  write_word(ebda_seg, IPL_SEQUENCE_OFFSET, 0xffff);
  9.1113  }
  9.1114  
  9.1115  static Bit8u
  9.1116  get_boot_vector(i, e)
  9.1117 -Bit16u i; struct ipl_entry *e; 
  9.1118 +Bit16u i; ipl_entry_t *e;
  9.1119  {
  9.1120    Bit16u count;
  9.1121    Bit16u ss = get_SS();
  9.1122 +  Bit16u ebda_seg = read_word(0x0040, 0x000E);
  9.1123    /* Get the count of boot devices, and refuse to overrun the array */
  9.1124 -  count = read_word(IPL_SEG, IPL_COUNT_OFFSET);
  9.1125 +  count = read_word(ebda_seg, IPL_COUNT_OFFSET);
  9.1126    if (i >= count) return 0;
  9.1127    /* OK to read this device */
  9.1128 -  memcpyb(ss, e, IPL_SEG, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
  9.1129 +  memcpyb(ss, e, ebda_seg, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
  9.1130    return 1;
  9.1131  }
  9.1132  
  9.1133 +#if BX_ELTORITO_BOOT
  9.1134 +  void
  9.1135 +interactive_bootkey()
  9.1136 +{
  9.1137 +  ipl_entry_t e;
  9.1138 +  Bit16u count;
  9.1139 +  char description[33];
  9.1140 +  Bit8u scan_code;
  9.1141 +  Bit8u i;
  9.1142 +  Bit16u ss = get_SS();
  9.1143 +  Bit16u valid_choice = 0;
  9.1144 +  Bit16u ebda_seg = read_word(0x0040, 0x000E);
  9.1145 +
  9.1146 +  while (check_for_keystroke())
  9.1147 +    get_keystroke();
  9.1148 +
  9.1149 +  printf("\nPress F12 for boot menu.\n\n");
  9.1150 +
  9.1151 +  delay_ticks_and_check_for_keystroke(11, 5); /* ~3 seconds */
  9.1152 +  if (check_for_keystroke())
  9.1153 +  {
  9.1154 +    scan_code = get_keystroke();
  9.1155 +    if (scan_code == 0x86) /* F12 */
  9.1156 +    {
  9.1157 +      while (check_for_keystroke())
  9.1158 +        get_keystroke();
  9.1159 +
  9.1160 +      printf("Select boot device:\n\n");
  9.1161 +
  9.1162 +      count = read_word(ebda_seg, IPL_COUNT_OFFSET);
  9.1163 +      for (i = 0; i < count; i++)
  9.1164 +      {
  9.1165 +        memcpyb(ss, &e, ebda_seg, IPL_TABLE_OFFSET + i * sizeof (e), sizeof (e));
  9.1166 +        printf("%d. ", i+1);
  9.1167 +        switch(e.type)
  9.1168 +        {
  9.1169 +          case IPL_TYPE_FLOPPY:
  9.1170 +          case IPL_TYPE_HARDDISK:
  9.1171 +          case IPL_TYPE_CDROM:
  9.1172 +            printf("%s\n", drivetypes[e.type]);
  9.1173 +            break;
  9.1174 +          case IPL_TYPE_BEV:
  9.1175 +            printf("%s", drivetypes[4]);
  9.1176 +            if (e.description != 0)
  9.1177 +            {
  9.1178 +              memcpyb(ss, &description, (Bit16u)(e.description >> 16), (Bit16u)(e.description & 0xffff), 32);
  9.1179 +              description[32] = 0;
  9.1180 +              printf(" [%S]", ss, description);
  9.1181 +           }
  9.1182 +           printf("\n");
  9.1183 +           break;
  9.1184 +        }
  9.1185 +      }
  9.1186 +
  9.1187 +      count++;
  9.1188 +      while (!valid_choice) {
  9.1189 +        scan_code = get_keystroke();
  9.1190 +        if (scan_code == 0x01 || scan_code == 0x58) /* ESC or F12 */
  9.1191 +        {
  9.1192 +          valid_choice = 1;
  9.1193 +        }
  9.1194 +        else if (scan_code <= count)
  9.1195 +        {
  9.1196 +          valid_choice = 1;
  9.1197 +          scan_code -= 1;
  9.1198 +          /* Set user selected device */
  9.1199 +          write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, scan_code);
  9.1200 +        }
  9.1201 +      }
  9.1202 +    printf("\n");
  9.1203 +    }
  9.1204 +  }
  9.1205 +}
  9.1206 +#endif // BX_ELTORITO_BOOT
  9.1207  
  9.1208  //--------------------------------------------------------------------------
  9.1209  // print_boot_device
  9.1210  //   displays the boot device
  9.1211  //--------------------------------------------------------------------------
  9.1212  
  9.1213 -static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
  9.1214 -
  9.1215  void
  9.1216 -print_boot_device(type)
  9.1217 +print_boot_device(e)
  9.1218 +  ipl_entry_t *e;
  9.1219 +{
  9.1220    Bit16u type;
  9.1221 -{
  9.1222 -  /* NIC appears as type 0x80 */ 
  9.1223 -  if (type == 0x80 ) type = 0x4;
  9.1224 -  if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n"); 
  9.1225 -  printf("Booting from %s...\n", drivetypes[type]);
  9.1226 +  char description[33];
  9.1227 +  Bit16u ss = get_SS();
  9.1228 +  type = e->type;
  9.1229 +  /* NIC appears as type 0x80 */
  9.1230 +  if (type == IPL_TYPE_BEV) type = 0x4;
  9.1231 +  if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n");
  9.1232 +  printf("Booting from %s", drivetypes[type]);
  9.1233 +  /* print product string if BEV */
  9.1234 +  if (type == 4 && e->description != 0) {
  9.1235 +    /* first 32 bytes are significant */
  9.1236 +    memcpyb(ss, &description, (Bit16u)(e->description >> 16), (Bit16u)(e->description & 0xffff), 32);
  9.1237 +    /* terminate string */
  9.1238 +    description[32] = 0;
  9.1239 +    printf(" [%S]", ss, description);
  9.1240 +  }
  9.1241 +  printf("...\n");
  9.1242  }
  9.1243  
  9.1244  //--------------------------------------------------------------------------
  9.1245 @@ -2051,17 +2244,17 @@ print_boot_device(type)
  9.1246  print_boot_failure(type, reason)
  9.1247    Bit16u type; Bit8u reason;
  9.1248  {
  9.1249 -  if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n"); 
  9.1250 +  if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n");
  9.1251  
  9.1252    printf("Boot from %s failed", drivetypes[type]);
  9.1253    if (type < 4) {
  9.1254      /* Report the reason too */
  9.1255 -  if (reason==0) 
  9.1256 -    printf(": not a bootable disk");
  9.1257 -  else
  9.1258 -    printf(": could not read the boot disk");
  9.1259 +    if (reason==0)
  9.1260 +      printf(": not a bootable disk");
  9.1261 +    else
  9.1262 +      printf(": could not read the boot disk");
  9.1263    }
  9.1264 -  printf("\n");
  9.1265 +  printf("\n\n");
  9.1266  }
  9.1267  
  9.1268  //--------------------------------------------------------------------------
  9.1269 @@ -2073,219 +2266,10 @@ print_cdromboot_failure( code )
  9.1270    Bit16u code;
  9.1271  {
  9.1272    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "CDROM boot failure code : %04x\n",code);
  9.1273 -  
  9.1274 +
  9.1275    return;
  9.1276  }
  9.1277  
  9.1278 -#define WAIT_HZ 18
  9.1279 -/**
  9.1280 - * Check for keystroke.
  9.1281 - * @returns    True if keystroke available, False if not.
  9.1282 - */
  9.1283 -Bit8u check_for_keystroke()
  9.1284 -{
  9.1285 -ASM_START
  9.1286 -    mov  ax, #0x100
  9.1287 -    int  #0x16
  9.1288 -    jz   no_key
  9.1289 -    mov  al, #1
  9.1290 -    jmp  done
  9.1291 -no_key:
  9.1292 -    xor  al, al
  9.1293 -done:
  9.1294 -ASM_END
  9.1295 -}
  9.1296 -
  9.1297 -/**
  9.1298 - * Get keystroke.
  9.1299 - * @returns    BIOS scan code.
  9.1300 - */
  9.1301 -Bit8u get_keystroke()
  9.1302 -{
  9.1303 -ASM_START
  9.1304 -    mov  ax, #0x0
  9.1305 -    int  #0x16
  9.1306 -    xchg ah, al
  9.1307 -ASM_END
  9.1308 -}
  9.1309 -
  9.1310 -/**
  9.1311 - * Waits (sleeps) for the given number of ticks.
  9.1312 - * Checks for keystroke.
  9.1313 - *
  9.1314 - * @returns BIOS scan code if available, 0 if not.
  9.1315 - * @param   ticks       Number of ticks to sleep.
  9.1316 - * @param   stop_on_key Whether to stop immediately upon keypress.
  9.1317 - */
  9.1318 -Bit8u wait(ticks, stop_on_key)
  9.1319 -  Bit16u ticks;
  9.1320 -  Bit8u stop_on_key;
  9.1321 -{
  9.1322 -    long ticks_to_wait, delta;
  9.1323 -    Bit32u prev_ticks, t;
  9.1324 -    Bit8u scan_code = 0;
  9.1325 -
  9.1326 -    /*
  9.1327 -     * The 0:046c wraps around at 'midnight' according to a 18.2Hz clock.
  9.1328 -     * We also have to be careful about interrupt storms.
  9.1329 -     */
  9.1330 -    ticks_to_wait = ticks;
  9.1331 -    prev_ticks = read_dword(0x0, 0x46c);
  9.1332 -    do
  9.1333 -    {
  9.1334 -        t = read_dword(0x0, 0x46c);
  9.1335 -        if (t > prev_ticks)
  9.1336 -        {
  9.1337 -            delta = t - prev_ticks;     /* The temp var is required or bcc screws up. */
  9.1338 -            ticks_to_wait -= delta;
  9.1339 -        }
  9.1340 -        else if (t < prev_ticks)
  9.1341 -            ticks_to_wait -= t;         /* wrapped */
  9.1342 -        prev_ticks = t;
  9.1343 -
  9.1344 -        if (check_for_keystroke())
  9.1345 -        {
  9.1346 -            scan_code = get_keystroke();
  9.1347 -            bios_printf(BIOS_PRINTF_DEBUG, "Key pressed: %x\n", scan_code);
  9.1348 -            if (stop_on_key)
  9.1349 -                return scan_code;
  9.1350 -        }
  9.1351 -    } while (ticks_to_wait > 0);
  9.1352 -    return scan_code;
  9.1353 -}
  9.1354 -
  9.1355 -static void clearscreen() {
  9.1356 -    /* Hide cursor, clear screen and move cursor to starting position */
  9.1357 -ASM_START
  9.1358 -        push bx
  9.1359 -        push cx
  9.1360 -        push dx
  9.1361 -
  9.1362 -        mov  ax, #0x100
  9.1363 -        mov  cx, #0x1000
  9.1364 -        int  #0x10
  9.1365 -
  9.1366 -        mov  ax, #0x700
  9.1367 -        mov  bh, #7
  9.1368 -        xor  cx, cx
  9.1369 -        mov  dx, #0x184f
  9.1370 -        int  #0x10
  9.1371 -
  9.1372 -        mov  ax, #0x200
  9.1373 -        xor  bx, bx
  9.1374 -        xor  dx, dx
  9.1375 -        int  #0x10
  9.1376 -
  9.1377 -        pop  dx
  9.1378 -        pop  cx
  9.1379 -        pop  bx
  9.1380 -ASM_END
  9.1381 -}
  9.1382 -
  9.1383 -int bootmenu(selected)
  9.1384 -  int selected;
  9.1385 -{
  9.1386 -    Bit8u scode;
  9.1387 -    int max;
  9.1388 -
  9.1389 -    /* get the number of boot devices */
  9.1390 -    max = read_word(IPL_SEG, IPL_COUNT_OFFSET);
  9.1391 -
  9.1392 -    for(;;) {
  9.1393 -        if (selected > max || selected < 1) selected = 1;
  9.1394 -        clearscreen();
  9.1395 -        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\n\n\n\n\n\n");
  9.1396 -        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "          Select boot device\n\n");
  9.1397 -        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            1. Floppy\n");
  9.1398 -        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            2. Hard drive\n");
  9.1399 -        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            3. CD-ROM\n");
  9.1400 -        if (max == 4)
  9.1401 -            bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "            4. Network\n");
  9.1402 -        bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\n          Currently selected: %d\n", selected);
  9.1403 -
  9.1404 -        do {
  9.1405 -            scode = wait(WAIT_HZ, 1);
  9.1406 -        } while (scode == 0);
  9.1407 -        switch(scode) {
  9.1408 -        case 0x02:
  9.1409 -        case 0x03:
  9.1410 -        case 0x04:
  9.1411 -            selected = scode - 1;
  9.1412 -            break;
  9.1413 -        case 0x05:
  9.1414 -            if (max == 4)
  9.1415 -                selected = scode -1 ;
  9.1416 -            else
  9.1417 -                scode = 0;
  9.1418 -            break;
  9.1419 -        case 0x48:
  9.1420 -            selected -= 1;
  9.1421 -            if (selected < 1)
  9.1422 -                selected = 1;
  9.1423 -            scode = 0;
  9.1424 -            break;
  9.1425 -        case 0x50:
  9.1426 -            selected += 1;
  9.1427 -            if (selected > max)
  9.1428 -                selected = max;
  9.1429 -            scode = 0;
  9.1430 -            break;
  9.1431 -        case 0x1c:
  9.1432 -            break;
  9.1433 -        default:
  9.1434 -            scode = 0;
  9.1435 -            break;
  9.1436 -        }
  9.1437 -        if (scode != 0)
  9.1438 -            break;
  9.1439 -    }
  9.1440 -
  9.1441 -    switch (selected) {
  9.1442 -    case 1:
  9.1443 -        return 0x3D;
  9.1444 -    case 2:
  9.1445 -        return 0x3E;
  9.1446 -    case 3:
  9.1447 -        return 0x3F;
  9.1448 -    case 4:
  9.1449 -        return 0x58;
  9.1450 -    default:
  9.1451 -        return 0;
  9.1452 -    }
  9.1453 -}
  9.1454 -
  9.1455 -void interactive_bootkey()
  9.1456 -{
  9.1457 -    Bit16u i;
  9.1458 -    Bit8u scan = 0;
  9.1459 -
  9.1460 -    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO,
  9.1461 -                "\n\nPress F10 to select boot device.\n");
  9.1462 -
  9.1463 -    scan = wait(1, 0);
  9.1464 -    if (scan == 0x44)
  9.1465 -        scan = bootmenu(inb_cmos(0x3d) & 0x0f);
  9.1466 -
  9.1467 -    /* set the default based on the keypress or menu */
  9.1468 -    switch(scan) {
  9.1469 -    case 0x3D:
  9.1470 -        outb_cmos(0x3d, 0x01);
  9.1471 -        break;
  9.1472 -    case 0x3E:
  9.1473 -        outb_cmos(0x3d, 0x02);
  9.1474 -        break;
  9.1475 -    case 0x3F:
  9.1476 -        outb_cmos(0x3d, 0x03);
  9.1477 -        break;
  9.1478 -    case 0x58:
  9.1479 -        outb_cmos(0x3d, 0x04);
  9.1480 -        break;
  9.1481 -    default:
  9.1482 -        break;
  9.1483 -    }
  9.1484 -}
  9.1485 -
  9.1486 -
  9.1487  void
  9.1488  nmi_handler_msg()
  9.1489  {
  9.1490 @@ -2304,7 +2288,7 @@ log_bios_start()
  9.1491  #if BX_DEBUG_SERIAL
  9.1492    outb(BX_DEBUG_PORT+UART_LCR, 0x03); /* setup for serial logging: 8N1 */
  9.1493  #endif
  9.1494 -  BX_INFO("%s\n", bios_version_string);
  9.1495 +  BX_INFO("%s\n", bios_cvs_version_string);
  9.1496  }
  9.1497  
  9.1498    bx_bool
  9.1499 @@ -2339,39 +2323,35 @@ debugger_off()
  9.1500    outb(0xfedc, 0x00);
  9.1501  }
  9.1502  
  9.1503 -void 
  9.1504 +int
  9.1505  s3_resume()
  9.1506  {
  9.1507      Bit32u s3_wakeup_vector;
  9.1508 -    Bit16u s3_wakeup_ip, s3_wakeup_cs;
  9.1509 -    Bit8u cmos_shutdown_status;
  9.1510 -
  9.1511 +    Bit8u s3_resume_flag;
  9.1512 +
  9.1513 +    s3_resume_flag = read_byte(0x40, 0xb0);
  9.1514 +#ifdef HVMASSIST
  9.1515 +    s3_wakeup_vector = get_s3_waking_vector();
  9.1516 +#else
  9.1517 +    s3_wakeup_vector = read_dword(0x40, 0xb2);
  9.1518 +#endif
  9.1519 +
  9.1520 +    BX_INFO("S3 resume called %x 0x%lx\n", s3_resume_flag, s3_wakeup_vector);
  9.1521 +    if (s3_resume_flag != CMOS_SHUTDOWN_S3 || !s3_wakeup_vector)
  9.1522 +	    return 0;
  9.1523 +
  9.1524 +    write_byte(0x40, 0xb0, 0);
  9.1525 +
  9.1526 +    /* setup wakeup vector */
  9.1527 +    write_word(0x40, 0xb6, (s3_wakeup_vector & 0xF)); /* IP */
  9.1528 +    write_word(0x40, 0xb8, (s3_wakeup_vector >> 4)); /* CS */
  9.1529 +
  9.1530 +    BX_INFO("S3 resume jump to %x:%x\n", (s3_wakeup_vector >> 4),
  9.1531 +		    (s3_wakeup_vector & 0xF));
  9.1532  ASM_START
  9.1533 -    push ds
  9.1534 -    push ax
  9.1535 -    mov ax, #EBDA_SEG
  9.1536 -    mov ds, ax
  9.1537 -    mov al, [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET]
  9.1538 -    mov .s3_resume.cmos_shutdown_status[bp], al
  9.1539 -    pop ax
  9.1540 -    pop ds
  9.1541 +    jmpf [0x04b6]
  9.1542  ASM_END
  9.1543 -
  9.1544 -    if (cmos_shutdown_status != CMOS_SHUTDOWN_S3)
  9.1545 -        return;
  9.1546 -
  9.1547 -    s3_wakeup_vector = get_s3_waking_vector();
  9.1548 -    if (!s3_wakeup_vector)
  9.1549 -        return;
  9.1550 -
  9.1551 -    s3_wakeup_ip = s3_wakeup_vector & 0xF;
  9.1552 -    s3_wakeup_cs = s3_wakeup_vector >> 4;
  9.1553 -
  9.1554 -ASM_START
  9.1555 -    push .s3_resume.s3_wakeup_cs[bp]
  9.1556 -    push .s3_resume.s3_wakeup_ip[bp]
  9.1557 -    retf
  9.1558 -ASM_END
  9.1559 +    return 1;
  9.1560  }
  9.1561  
  9.1562  #if BX_USE_ATADRV
  9.1563 @@ -2421,6 +2401,7 @@ ASM_END
  9.1564  // bits 7-4 of the device/head (CB_DH) reg
  9.1565  #define ATA_CB_DH_DEV0 0xa0    // select device 0
  9.1566  #define ATA_CB_DH_DEV1 0xb0    // select device 1
  9.1567 +#define ATA_CB_DH_LBA 0x40    // use LBA
  9.1568  
  9.1569  // status reg (CB_STAT and CB_ASTAT) bits
  9.1570  #define ATA_CB_STAT_BSY  0x80  // busy
  9.1571 @@ -2470,6 +2451,7 @@ ASM_END
  9.1572  #define ATA_CMD_READ_SECTORS                 0x20
  9.1573  #define ATA_CMD_READ_VERIFY_SECTORS          0x40
  9.1574  #define ATA_CMD_RECALIBRATE                  0x10
  9.1575 +#define ATA_CMD_REQUEST_SENSE                0x03
  9.1576  #define ATA_CMD_SEEK                         0x70
  9.1577  #define ATA_CMD_SET_FEATURES                 0xEF
  9.1578  #define ATA_CMD_SET_MULTIPLE_MODE            0xC6
  9.1579 @@ -2514,7 +2496,7 @@ ASM_END
  9.1580  #define ATA_DATA_NO      0x00
  9.1581  #define ATA_DATA_IN      0x01
  9.1582  #define ATA_DATA_OUT     0x02
  9.1583 -  
  9.1584 +
  9.1585  // ---------------------------------------------------------------------------
  9.1586  // ATA/ATAPI driver : initialization
  9.1587  // ---------------------------------------------------------------------------
  9.1588 @@ -2523,7 +2505,7 @@ void ata_init( )
  9.1589    Bit16u ebda_seg=read_word(0x0040,0x000E);
  9.1590    Bit8u  channel, device;
  9.1591  
  9.1592 -  // Channels info init. 
  9.1593 +  // Channels info init.
  9.1594    for (channel=0; channel<BX_MAX_ATA_INTERFACES; channel++) {
  9.1595      write_byte(ebda_seg,&EbdaData->ata.channels[channel].iface,ATA_IFACE_NONE);
  9.1596      write_word(ebda_seg,&EbdaData->ata.channels[channel].iobase1,0x0);
  9.1597 @@ -2531,7 +2513,7 @@ void ata_init( )
  9.1598      write_byte(ebda_seg,&EbdaData->ata.channels[channel].irq,0);
  9.1599      }
  9.1600  
  9.1601 -  // Devices info init. 
  9.1602 +  // Devices info init.
  9.1603    for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
  9.1604      write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
  9.1605      write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_NONE);
  9.1606 @@ -2546,11 +2528,12 @@ void ata_init( )
  9.1607      write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads,0);
  9.1608      write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders,0);
  9.1609      write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt,0);
  9.1610 -    
  9.1611 -    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors,0L);
  9.1612 +
  9.1613 +    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low,0L);
  9.1614 +    write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high,0L);
  9.1615      }
  9.1616  
  9.1617 -  // hdidmap  and cdidmap init. 
  9.1618 +  // hdidmap  and cdidmap init.
  9.1619    for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
  9.1620      write_byte(ebda_seg,&EbdaData->ata.hdidmap[device],BX_MAX_ATA_DEVICES);
  9.1621      write_byte(ebda_seg,&EbdaData->ata.cdidmap[device],BX_MAX_ATA_DEVICES);
  9.1622 @@ -2560,6 +2543,58 @@ void ata_init( )
  9.1623    write_byte(ebda_seg,&EbdaData->ata.cdcount,0);
  9.1624  }
  9.1625  
  9.1626 +#define TIMEOUT 0
  9.1627 +#define BSY 1
  9.1628 +#define NOT_BSY 2
  9.1629 +#define NOT_BSY_DRQ 3
  9.1630 +#define NOT_BSY_NOT_DRQ 4
  9.1631 +#define NOT_BSY_RDY 5
  9.1632 +
  9.1633 +#define IDE_TIMEOUT 32000u //32 seconds max for IDE ops
  9.1634 +
  9.1635 +int await_ide();
  9.1636 +static int await_ide(when_done,base,timeout)
  9.1637 +  Bit8u when_done;
  9.1638 +  Bit16u base;
  9.1639 +  Bit16u timeout;
  9.1640 +{
  9.1641 +  Bit32u time=0,last=0;
  9.1642 +  Bit16u status;
  9.1643 +  Bit8u result;
  9.1644 +  status = inb(base + ATA_CB_STAT); // for the times you're supposed to throw one away
  9.1645 +  for(;;) {
  9.1646 +    status = inb(base+ATA_CB_STAT);
  9.1647 +    time++;
  9.1648 +    if (when_done == BSY)
  9.1649 +      result = status & ATA_CB_STAT_BSY;
  9.1650 +    else if (when_done == NOT_BSY)
  9.1651 +      result = !(status & ATA_CB_STAT_BSY);
  9.1652 +    else if (when_done == NOT_BSY_DRQ)
  9.1653 +      result = !(status & ATA_CB_STAT_BSY) && (status & ATA_CB_STAT_DRQ);
  9.1654 +    else if (when_done == NOT_BSY_NOT_DRQ)
  9.1655 +      result = !(status & ATA_CB_STAT_BSY) && !(status & ATA_CB_STAT_DRQ);
  9.1656 +    else if (when_done == NOT_BSY_RDY)
  9.1657 +      result = !(status & ATA_CB_STAT_BSY) && (status & ATA_CB_STAT_RDY);
  9.1658 +    else if (when_done == TIMEOUT)
  9.1659 +      result = 0;
  9.1660 +
  9.1661 +    if (result) return 0;
  9.1662 +    if (time>>16 != last) // mod 2048 each 16 ms
  9.1663 +    {
  9.1664 +      last = time >>16;
  9.1665 +      BX_DEBUG_ATA("await_ide: (TIMEOUT,BSY,!BSY,!BSY_DRQ,!BSY_!DRQ,!BSY_RDY) %d time= %ld timeout= %d\n",when_done,time>>11, timeout);
  9.1666 +    }
  9.1667 +    if (status & ATA_CB_STAT_ERR)
  9.1668 +    {
  9.1669 +      BX_DEBUG_ATA("await_ide: ERROR (TIMEOUT,BSY,!BSY,!BSY_DRQ,!BSY_!DRQ,!BSY_RDY) %d time= %ld timeout= %d\n",when_done,time>>11, timeout);
  9.1670 +      return -1;
  9.1671 +    }
  9.1672 +    if ((timeout == 0) || ((time>>11) > timeout)) break;
  9.1673 +  }
  9.1674 +  BX_INFO("IDE time out\n");
  9.1675 +  return -1;
  9.1676 +}
  9.1677 +
  9.1678  // ---------------------------------------------------------------------------
  9.1679  // ATA/ATAPI driver : device detection
  9.1680  // ---------------------------------------------------------------------------
  9.1681 @@ -2600,7 +2635,7 @@ void ata_detect( )
  9.1682  
  9.1683    // Device detection
  9.1684    hdcount=cdcount=0;
  9.1685 -  
  9.1686 +
  9.1687    for(device=0; device<BX_MAX_ATA_DEVICES; device++) {
  9.1688      Bit16u iobase1, iobase2;
  9.1689      Bit8u  channel, slave, shift;
  9.1690 @@ -2630,33 +2665,34 @@ void ata_detect( )
  9.1691  
  9.1692      if ( (sc == 0x55) && (sn == 0xaa) ) {
  9.1693        write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_UNKNOWN);
  9.1694 -    
  9.1695 +
  9.1696        // reset the channel
  9.1697 -      ata_reset (device);
  9.1698 -      
  9.1699 +      ata_reset(device);
  9.1700 +
  9.1701        // check for ATA or ATAPI
  9.1702        outb(iobase1+ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
  9.1703        sc = inb(iobase1+ATA_CB_SC);
  9.1704        sn = inb(iobase1+ATA_CB_SN);
  9.1705 -      if ( (sc==0x01) && (sn==0x01) ) {
  9.1706 +      if ((sc==0x01) && (sn==0x01)) {
  9.1707          cl = inb(iobase1+ATA_CB_CL);
  9.1708          ch = inb(iobase1+ATA_CB_CH);
  9.1709          st = inb(iobase1+ATA_CB_STAT);
  9.1710  
  9.1711 -        if ( (cl==0x14) && (ch==0xeb) ) {
  9.1712 +        if ((cl==0x14) && (ch==0xeb)) {
  9.1713            write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATAPI);
  9.1714 -          }
  9.1715 -        else if ( (cl==0x00) && (ch==0x00) && (st!=0x00) ) {
  9.1716 +        } else if ((cl==0x00) && (ch==0x00) && (st!=0x00)) {
  9.1717            write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATA);
  9.1718 -          }
  9.1719 +        } else if ((cl==0xff) && (ch==0xff)) {
  9.1720 +          write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
  9.1721          }
  9.1722        }
  9.1723 +    }
  9.1724  
  9.1725      type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
  9.1726 -    
  9.1727 -    // Now we send a IDENTIFY command to ATA device 
  9.1728 +
  9.1729 +    // Now we send a IDENTIFY command to ATA device
  9.1730      if(type == ATA_TYPE_ATA) {
  9.1731 -      Bit32u sectors;
  9.1732 +      Bit32u sectors_low, sectors_high;
  9.1733        Bit16u cylinders, heads, spt, blksize;
  9.1734        Bit8u  translation, removable, mode;
  9.1735  
  9.1736 @@ -2667,21 +2703,26 @@ void ata_detect( )
  9.1737        write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
  9.1738        write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
  9.1739  
  9.1740 -      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, get_SS(),buffer) !=0 )
  9.1741 +      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, 0L, get_SS(),buffer) !=0 )
  9.1742          BX_PANIC("ata-detect: Failed to detect ATA device\n");
  9.1743  
  9.1744        removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
  9.1745 -#ifndef	NO_PIO32
  9.1746 +#ifndef        NO_PIO32
  9.1747        mode      = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : ATA_MODE_PIO16;
  9.1748  #endif
  9.1749 -
  9.1750        blksize   = read_word(get_SS(),buffer+10);
  9.1751 -      
  9.1752 +
  9.1753        cylinders = read_word(get_SS(),buffer+(1*2)); // word 1
  9.1754        heads     = read_word(get_SS(),buffer+(3*2)); // word 3
  9.1755        spt       = read_word(get_SS(),buffer+(6*2)); // word 6
  9.1756  
  9.1757 -      sectors   = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 61
  9.1758 +      if (read_word(get_SS(),buffer+(83*2)) & (1 << 10)) { // word 83 - lba48 support
  9.1759 +        sectors_low  = read_dword(get_SS(),buffer+(100*2)); // word 100 and word 101
  9.1760 +        sectors_high = read_dword(get_SS(),buffer+(102*2)); // word 102 and word 103
  9.1761 +      } else {
  9.1762 +        sectors_low = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 61
  9.1763 +        sectors_high = 0;
  9.1764 +      }
  9.1765  
  9.1766        write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
  9.1767        write_byte(ebda_seg,&EbdaData->ata.devices[device].removable, removable);
  9.1768 @@ -2690,7 +2731,8 @@ void ata_detect( )
  9.1769        write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads, heads);
  9.1770        write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders, cylinders);
  9.1771        write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt, spt);
  9.1772 -      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors, sectors);
  9.1773 +      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low, sectors_low);
  9.1774 +      write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high, sectors_high);
  9.1775        BX_INFO("ata%d-%d: PCHS=%u/%d/%d translation=", channel, slave,cylinders, heads, spt);
  9.1776  
  9.1777        translation = inb_cmos(0x39 + channel/2);
  9.1778 @@ -2718,14 +2760,14 @@ void ata_detect( )
  9.1779            break;
  9.1780          case ATA_TRANSLATION_LBA:
  9.1781            spt = 63;
  9.1782 -          sectors /= 63;
  9.1783 -          heads = sectors / 1024;
  9.1784 +          sectors_low /= 63;
  9.1785 +          heads = sectors_low / 1024;
  9.1786            if (heads>128) heads = 255;
  9.1787            else if (heads>64) heads = 128;
  9.1788            else if (heads>32) heads = 64;
  9.1789            else if (heads>16) heads = 32;
  9.1790            else heads=16;
  9.1791 -          cylinders = sectors / heads;
  9.1792 +          cylinders = sectors_low / heads;
  9.1793            break;
  9.1794          case ATA_TRANSLATION_RECHS:
  9.1795            // Take care not to overflow
  9.1796 @@ -2752,15 +2794,15 @@ void ata_detect( )
  9.1797        write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.heads, heads);
  9.1798        write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.cylinders, cylinders);
  9.1799        write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.spt, spt);
  9.1800 - 
  9.1801 -      // fill hdidmap 
  9.1802 +
  9.1803 +      // fill hdidmap
  9.1804        write_byte(ebda_seg,&EbdaData->ata.hdidmap[hdcount], device);
  9.1805        hdcount++;
  9.1806        }
  9.1807 -    
  9.1808 +
  9.1809      // Now we send a IDENTIFY command to ATAPI device
  9.1810      if(type == ATA_TYPE_ATAPI) {
  9.1811 - 
  9.1812 +
  9.1813        Bit8u  type, removable, mode;
  9.1814        Bit16u blksize;
  9.1815  
  9.1816 @@ -2771,12 +2813,12 @@ void ata_detect( )
  9.1817        write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_CDROM);
  9.1818        write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
  9.1819  
  9.1820 -      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 0L, get_SS(),buffer) != 0)
  9.1821 +      if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 0L, 0L, get_SS(),buffer) != 0)
  9.1822          BX_PANIC("ata-detect: Failed to detect ATAPI device\n");
  9.1823  
  9.1824        type      = read_byte(get_SS(),buffer+1) & 0x1f;
  9.1825        removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
  9.1826 -#ifndef	NO_PIO32
  9.1827 +#ifndef        NO_PIO32
  9.1828        mode      = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : ATA_MODE_PIO16;
  9.1829  #endif
  9.1830        blksize   = 2048;
  9.1831 @@ -2786,24 +2828,24 @@ void ata_detect( )
  9.1832        write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, mode);
  9.1833        write_word(ebda_seg,&EbdaData->ata.devices[device].blksize, blksize);
  9.1834  
  9.1835 -      // fill cdidmap 
  9.1836 +      // fill cdidmap
  9.1837        write_byte(ebda_seg,&EbdaData->ata.cdidmap[cdcount], device);
  9.1838        cdcount++;
  9.1839        }
  9.1840 -  
  9.1841 +
  9.1842        {
  9.1843        Bit32u sizeinmb;
  9.1844        Bit16u ataversion;
  9.1845        Bit8u  c, i, version, model[41];
  9.1846 -      
  9.1847 +
  9.1848        switch (type) {
  9.1849          case ATA_TYPE_ATA:
  9.1850 -          sizeinmb = read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors);
  9.1851 -          sizeinmb >>= 11;
  9.1852 +          sizeinmb = (read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high) << 21)
  9.1853 +            | (read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low) >> 11);
  9.1854          case ATA_TYPE_ATAPI:
  9.1855            // Read ATA/ATAPI version
  9.1856            ataversion=((Bit16u)(read_byte(get_SS(),buffer+161))<<8)|read_byte(get_SS(),buffer+160);
  9.1857 -          for(version=15;version>0;version--) { 
  9.1858 +          for(version=15;version>0;version--) {
  9.1859              if((ataversion&(1<<version))!=0)
  9.1860              break;
  9.1861              }
  9.1862 @@ -2812,7 +2854,7 @@ void ata_detect( )
  9.1863            for(i=0;i<20;i++){
  9.1864              write_byte(get_SS(),model+(i*2),read_byte(get_SS(),buffer+(i*2)+54+1));
  9.1865              write_byte(get_SS(),model+(i*2)+1,read_byte(get_SS(),buffer+(i*2)+54));
  9.1866 -            }
  9.1867 +          }
  9.1868  
  9.1869            // Reformat
  9.1870            write_byte(get_SS(),model+40,0x00);
  9.1871 @@ -2820,7 +2862,13 @@ void ata_detect( )
  9.1872              if(read_byte(get_SS(),model+i)==0x20)
  9.1873                write_byte(get_SS(),model+i,0x00);
  9.1874              else break;
  9.1875 +          }
  9.1876 +          if (i>36) {
  9.1877 +            write_byte(get_SS(),model+36,0x00);
  9.1878 +            for(i=35;i>32;i--){
  9.1879 +              write_byte(get_SS(),model+i,0x2E);
  9.1880              }
  9.1881 +          }
  9.1882            break;
  9.1883          }
  9.1884  
  9.1885 @@ -2828,10 +2876,10 @@ void ata_detect( )
  9.1886          case ATA_TYPE_ATA:
  9.1887            printf("ata%d %s: ",channel,slave?" slave":"master");
  9.1888            i=0; while(c=read_byte(get_SS(),model+i++)) printf("%c",c);
  9.1889 -          if (sizeinmb < 1UL<<16)
  9.1890 -            printf(" ATA-%d Hard-Disk (%04u MBytes)\n",version,(Bit16u)sizeinmb);
  9.1891 -          else
  9.1892 -            printf(" ATA-%d Hard-Disk (%04u GBytes)\n",version,(Bit16u)(sizeinmb>>10));
  9.1893 +	  if (sizeinmb < (1UL<<16))
  9.1894 +            printf(" ATA-%d Hard-Disk (%4u MBytes)\n", version, (Bit16u)sizeinmb);
  9.1895 +	  else
  9.1896 +            printf(" ATA-%d Hard-Disk (%4u GBytes)\n", version, (Bit16u)(sizeinmb>>10));
  9.1897            break;
  9.1898          case ATA_TYPE_ATAPI:
  9.1899            printf("ata%d %s: ",channel,slave?" slave":"master");
  9.1900 @@ -2852,17 +2900,17 @@ void ata_detect( )
  9.1901    write_byte(ebda_seg,&EbdaData->ata.hdcount, hdcount);
  9.1902    write_byte(ebda_seg,&EbdaData->ata.cdcount, cdcount);
  9.1903    write_byte(0x40,0x75, hdcount);
  9.1904 - 
  9.1905 +
  9.1906    printf("\n");
  9.1907  
  9.1908    // FIXME : should use bios=cmos|auto|disable bits
  9.1909    // FIXME : should know about translation bits
  9.1910 -  // FIXME : move hard_drive_post here 
  9.1911 -  
  9.1912 +  // FIXME : move hard_drive_post here
  9.1913 +
  9.1914  }
  9.1915  
  9.1916  // ---------------------------------------------------------------------------
  9.1917 -// ATA/ATAPI driver : software reset 
  9.1918 +// ATA/ATAPI driver : software reset
  9.1919  // ---------------------------------------------------------------------------
  9.1920  // ATA-3
  9.1921  // 8.2.1 Software reset - Device 0
  9.1922 @@ -2872,7 +2920,8 @@ Bit16u device;
  9.1923  {
  9.1924    Bit16u ebda_seg=read_word(0x0040,0x000E);
  9.1925    Bit16u iobase1, iobase2;
  9.1926 -  Bit8u  channel, slave, sn, sc; 
  9.1927 +  Bit8u  channel, slave, sn, sc;
  9.1928 +  Bit8u  type;
  9.1929    Bit16u max;
  9.1930  
  9.1931    channel = device / 2;
  9.1932 @@ -2887,16 +2936,13 @@ Bit16u device;
  9.1933    outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN | ATA_CB_DC_SRST);
  9.1934  
  9.1935  // 8.2.1 (b) -- wait for BSY
  9.1936 -  max=0xff;
  9.1937 -  while(--max>0) {
  9.1938 -    Bit8u status = inb(iobase1+ATA_CB_STAT);
  9.1939 -    if ((status & ATA_CB_STAT_BSY) != 0) break;
  9.1940 -  }
  9.1941 +  await_ide(BSY, iobase1, 20);
  9.1942  
  9.1943  // 8.2.1 (f) -- clear SRST
  9.1944    outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
  9.1945  
  9.1946 -  if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != ATA_TYPE_NONE) {
  9.1947 +  type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
  9.1948 +  if (type != ATA_TYPE_NONE) {
  9.1949  
  9.1950  // 8.2.1 (g) -- check for sc==sn==0x01
  9.1951      // select device
  9.1952 @@ -2905,21 +2951,14 @@ Bit16u device;
  9.1953      sn = inb(iobase1+ATA_CB_SN);
  9.1954  
  9.1955      if ( (sc==0x01) && (sn==0x01) ) {
  9.1956 +      if (type == ATA_TYPE_ATA) //ATA
  9.1957 +        await_ide(NOT_BSY_RDY, iobase1, IDE_TIMEOUT);
  9.1958 +      else //ATAPI
  9.1959 +        await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
  9.1960 +    }
  9.1961  
  9.1962  // 8.2.1 (h) -- wait for not BSY
  9.1963 -      max=0xff;
  9.1964 -      while(--max>0) {
  9.1965 -        Bit8u status = inb(iobase1+ATA_CB_STAT);
  9.1966 -        if ((status & ATA_CB_STAT_BSY) == 0) break;
  9.1967 -        }
  9.1968 -      }
  9.1969 -    }
  9.1970 -
  9.1971 -// 8.2.1 (i) -- wait for DRDY
  9.1972 -  max=0xfff;
  9.1973 -  while(--max>0) {
  9.1974 -    Bit8u status = inb(iobase1+ATA_CB_STAT);
  9.1975 -      if ((status & ATA_CB_STAT_RDY) != 0) break;
  9.1976 +    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
  9.1977    }
  9.1978  
  9.1979    // Enable interrupts
  9.1980 @@ -2927,7 +2966,7 @@ Bit16u device;
  9.1981  }
  9.1982  
  9.1983  // ---------------------------------------------------------------------------
  9.1984 -// ATA/ATAPI driver : execute a non data command 
  9.1985 +// ATA/ATAPI driver : execute a non data command
  9.1986  // ---------------------------------------------------------------------------
  9.1987  
  9.1988  Bit16u ata_cmd_non_data()
  9.1989 @@ -2945,9 +2984,9 @@ Bit16u ata_cmd_non_data()
  9.1990        // 5 : more sectors to read/verify
  9.1991        // 6 : no sectors left to write
  9.1992        // 7 : more sectors to write
  9.1993 -Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, lba, segment, offset)
  9.1994 +Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, lba_low, lba_high, segment, offset)
  9.1995  Bit16u device, command, count, cylinder, head, sector, segment, offset;
  9.1996 -Bit32u lba;
  9.1997 +Bit32u lba_low, lba_high;
  9.1998  {
  9.1999    Bit16u ebda_seg=read_word(0x0040,0x000E);
  9.2000    Bit16u iobase1, iobase2, blksize;
  9.2001 @@ -2976,22 +3015,20 @@ Bit32u lba;
  9.2002  
  9.2003    // sector will be 0 only on lba access. Convert to lba-chs
  9.2004    if (sector == 0) {
  9.2005 -    if ((count >= 1 << 8) || (lba + count >= 1UL << 28)) {
  9.2006 +    if ((count >= 1 << 8) || lba_high || (lba_low + count >= 1UL << 28)) {
  9.2007        outb(iobase1 + ATA_CB_FR, 0x00);
  9.2008        outb(iobase1 + ATA_CB_SC, (count >> 8) & 0xff);
  9.2009 -      outb(iobase1 + ATA_CB_SN, lba >> 24);
  9.2010 -      outb(iobase1 + ATA_CB_CL, 0);
  9.2011 -      outb(iobase1 + ATA_CB_CH, 0);
  9.2012 +      outb(iobase1 + ATA_CB_SN, lba_low >> 24);
  9.2013 +      outb(iobase1 + ATA_CB_CL, lba_high & 0xff);
  9.2014 +      outb(iobase1 + ATA_CB_CH, lba_high >> 8);
  9.2015        command |= 0x04;
  9.2016        count &= (1UL << 8) - 1;
  9.2017 -      lba &= (1UL << 24) - 1;
  9.2018 +      lba_low &= (1UL << 24) - 1;
  9.2019        }
  9.2020 -    sector = (Bit16u) (lba & 0x000000ffL);
  9.2021 -    lba >>= 8;
  9.2022 -    cylinder = (Bit16u) (lba & 0x0000ffffL);
  9.2023 -    lba >>= 16;
  9.2024 -    head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
  9.2025 -    }
  9.2026 +    sector = (Bit16u) (lba_low & 0x000000ffL);
  9.2027 +    cylinder = (Bit16u) ((lba_low>>8) & 0x0000ffffL);
  9.2028 +    head = ((Bit16u) ((lba_low>>24) & 0x0000000fL)) | ATA_CB_DH_LBA;
  9.2029 +  }
  9.2030  
  9.2031    outb(iobase1 + ATA_CB_FR, 0x00);
  9.2032    outb(iobase1 + ATA_CB_SC, count);
  9.2033 @@ -3001,10 +3038,8 @@ Bit32u lba;
  9.2034    outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | (Bit8u) head );
  9.2035    outb(iobase1 + ATA_CB_CMD, command);
  9.2036  
  9.2037 -  while (1) {
  9.2038 -    status = inb(iobase1 + ATA_CB_STAT);
  9.2039 -    if ( !(status & ATA_CB_STAT_BSY) ) break;
  9.2040 -    }
  9.2041 +  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
  9.2042 +  status = inb(iobase1 + ATA_CB_STAT);
  9.2043  
  9.2044    if (status & ATA_CB_STAT_ERR) {
  9.2045      BX_DEBUG_ATA("ata_cmd_data_in : read error\n");
  9.2046 @@ -3025,12 +3060,12 @@ ASM_END
  9.2047  ASM_START
  9.2048          push bp
  9.2049          mov  bp, sp
  9.2050 -        mov  di, _ata_cmd_data_in.offset + 2[bp]  
  9.2051 -        mov  ax, _ata_cmd_data_in.segment + 2[bp] 
  9.2052 -        mov  cx, _ata_cmd_data_in.blksize + 2[bp] 
  9.2053 +        mov  di, _ata_cmd_data_in.offset + 2[bp]
  9.2054 +        mov  ax, _ata_cmd_data_in.segment + 2[bp]
  9.2055 +        mov  cx, _ata_cmd_data_in.blksize + 2[bp]
  9.2056  
  9.2057          ;; adjust if there will be an overrun. 2K max sector size
  9.2058 -        cmp   di, #0xf800 ;; 
  9.2059 +        cmp   di, #0xf800 ;;
  9.2060          jbe   ata_in_no_adjust
  9.2061  
  9.2062  ata_in_adjust:
  9.2063 @@ -3042,7 +3077,7 @@ ata_in_no_adjust:
  9.2064  
  9.2065          mov   dx, _ata_cmd_data_in.iobase1 + 2[bp] ;; ATA data read port
  9.2066  
  9.2067 -        mov  ah, _ata_cmd_data_in.mode + 2[bp] 
  9.2068 +        mov  ah, _ata_cmd_data_in.mode + 2[bp]
  9.2069          cmp  ah, #ATA_MODE_PIO32
  9.2070          je   ata_in_32
  9.2071  
  9.2072 @@ -3064,9 +3099,10 @@ ASM_END
  9.2073      current++;
  9.2074      write_word(ebda_seg, &EbdaData->ata.trsfsectors,current);
  9.2075      count--;
  9.2076 +    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
  9.2077      status = inb(iobase1 + ATA_CB_STAT);
  9.2078      if (count == 0) {
  9.2079 -      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
  9.2080 +      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
  9.2081            != ATA_CB_STAT_RDY ) {
  9.2082          BX_DEBUG_ATA("ata_cmd_data_in : no sectors left (status %02x)\n", (unsigned) status);
  9.2083          return 4;
  9.2084 @@ -3074,7 +3110,7 @@ ASM_END
  9.2085        break;
  9.2086        }
  9.2087      else {
  9.2088 -      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
  9.2089 +      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
  9.2090            != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
  9.2091          BX_DEBUG_ATA("ata_cmd_data_in : more sectors left (status %02x)\n", (unsigned) status);
  9.2092          return 5;
  9.2093 @@ -3099,9 +3135,9 @@ ASM_END
  9.2094        // 5 : more sectors to read/verify
  9.2095        // 6 : no sectors left to write
  9.2096        // 7 : more sectors to write
  9.2097 -Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, lba, segment, offset)
  9.2098 +Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, lba_low, lba_high, segment, offset)
  9.2099  Bit16u device, command, count, cylinder, head, sector, segment, offset;
  9.2100 -Bit32u lba;
  9.2101 +Bit32u lba_low, lba_high;
  9.2102  {
  9.2103    Bit16u ebda_seg=read_word(0x0040,0x000E);
  9.2104    Bit16u iobase1, iobase2, blksize;
  9.2105 @@ -3130,22 +3166,20 @@ Bit32u lba;
  9.2106  
  9.2107    // sector will be 0 only on lba access. Convert to lba-chs
  9.2108    if (sector == 0) {
  9.2109 -    if ((count >= 1 << 8) || (lba + count >= 1UL << 28)) {
  9.2110 +    if ((count >= 1 << 8) || lba_high || (lba_low + count >= 1UL << 28)) {
  9.2111        outb(iobase1 + ATA_CB_FR, 0x00);
  9.2112        outb(iobase1 + ATA_CB_SC, (count >> 8) & 0xff);
  9.2113 -      outb(iobase1 + ATA_CB_SN, lba >> 24);
  9.2114 -      outb(iobase1 + ATA_CB_CL, 0);
  9.2115 -      outb(iobase1 + ATA_CB_CH, 0);
  9.2116 +      outb(iobase1 + ATA_CB_SN, lba_low >> 24);
  9.2117 +      outb(iobase1 + ATA_CB_CL, lba_high & 0xff);
  9.2118 +      outb(iobase1 + ATA_CB_CH, lba_high >> 8);
  9.2119        command |= 0x04;
  9.2120        count &= (1UL << 8) - 1;
  9.2121 -      lba &= (1UL << 24) - 1;
  9.2122 +      lba_low &= (1UL << 24) - 1;
  9.2123        }
  9.2124 -    sector = (Bit16u) (lba & 0x000000ffL);
  9.2125 -    lba >>= 8;
  9.2126 -    cylinder = (Bit16u) (lba & 0x0000ffffL);
  9.2127 -    lba >>= 16;
  9.2128 -    head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
  9.2129 -    }
  9.2130 +    sector = (Bit16u) (lba_low & 0x000000ffL);
  9.2131 +    cylinder = (Bit16u) ((lba_low>>8) & 0x0000ffffL);
  9.2132 +    head = ((Bit16u) ((lba_low>>24) & 0x0000000fL)) | ATA_CB_DH_LBA;
  9.2133 +  }
  9.2134  
  9.2135    outb(iobase1 + ATA_CB_FR, 0x00);
  9.2136    outb(iobase1 + ATA_CB_SC, count);
  9.2137 @@ -3155,10 +3189,8 @@ Bit32u lba;
  9.2138    outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | (Bit8u) head );
  9.2139    outb(iobase1 + ATA_CB_CMD, command);
  9.2140  
  9.2141 -  while (1) {
  9.2142 -    status = inb(iobase1 + ATA_CB_STAT);
  9.2143 -    if ( !(status & ATA_CB_STAT_BSY) ) break;
  9.2144 -    }
  9.2145 +  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
  9.2146 +  status = inb(iobase1 + ATA_CB_STAT);
  9.2147  
  9.2148    if (status & ATA_CB_STAT_ERR) {
  9.2149      BX_DEBUG_ATA("ata_cmd_data_out : read error\n");
  9.2150 @@ -3179,12 +3211,12 @@ ASM_END
  9.2151  ASM_START
  9.2152          push bp
  9.2153          mov  bp, sp
  9.2154 -        mov  si, _ata_cmd_data_out.offset + 2[bp]  
  9.2155 -        mov  ax, _ata_cmd_data_out.segment + 2[bp] 
  9.2156 -        mov  cx, _ata_cmd_data_out.blksize + 2[bp] 
  9.2157 +        mov  si, _ata_cmd_data_out.offset + 2[bp]
  9.2158 +        mov  ax, _ata_cmd_data_out.segment + 2[bp]
  9.2159 +        mov  cx, _ata_cmd_data_out.blksize + 2[bp]
  9.2160  
  9.2161          ;; adjust if there will be an overrun. 2K max sector size
  9.2162 -        cmp   si, #0xf800 ;; 
  9.2163 +        cmp   si, #0xf800 ;;
  9.2164          jbe   ata_out_no_adjust
  9.2165  
  9.2166  ata_out_adjust:
  9.2167 @@ -3196,7 +3228,7 @@ ata_out_no_adjust:
  9.2168  
  9.2169          mov   dx, _ata_cmd_data_out.iobase1 + 2[bp] ;; ATA data write port
  9.2170  
  9.2171 -        mov  ah, _ata_cmd_data_out.mode + 2[bp] 
  9.2172 +        mov  ah, _ata_cmd_data_out.mode + 2[bp]
  9.2173          cmp  ah, #ATA_MODE_PIO32
  9.2174          je   ata_out_32
  9.2175  
  9.2176 @@ -3222,7 +3254,7 @@ ASM_END
  9.2177      count--;
  9.2178      status = inb(iobase1 + ATA_CB_STAT);
  9.2179      if (count == 0) {
  9.2180 -      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
  9.2181 +      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
  9.2182            != ATA_CB_STAT_RDY ) {
  9.2183          BX_DEBUG_ATA("ata_cmd_data_out : no sectors left (status %02x)\n", (unsigned) status);
  9.2184          return 6;
  9.2185 @@ -3230,7 +3262,7 @@ ASM_END
  9.2186        break;
  9.2187        }
  9.2188      else {
  9.2189 -      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
  9.2190 +      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
  9.2191            != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
  9.2192          BX_DEBUG_ATA("ata_cmd_data_out : more sectors left (status %02x)\n", (unsigned) status);
  9.2193          return 7;
  9.2194 @@ -3297,19 +3329,17 @@ Bit32u length;
  9.2195    if (status & ATA_CB_STAT_BSY) return 2;
  9.2196  
  9.2197    outb(iobase2 + ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
  9.2198 -  // outb(iobase1 + ATA_CB_FR, 0x00);
  9.2199 -  // outb(iobase1 + ATA_CB_SC, 0x00);
  9.2200 -  // outb(iobase1 + ATA_CB_SN, 0x00);
  9.2201 +  outb(iobase1 + ATA_CB_FR, 0x00);
  9.2202 +  outb(iobase1 + ATA_CB_SC, 0x00);
  9.2203 +  outb(iobase1 + ATA_CB_SN, 0x00);
  9.2204    outb(iobase1 + ATA_CB_CL, 0xfff0 & 0x00ff);
  9.2205    outb(iobase1 + ATA_CB_CH, 0xfff0 >> 8);
  9.2206    outb(iobase1 + ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
  9.2207    outb(iobase1 + ATA_CB_CMD, ATA_CMD_PACKET);
  9.2208  
  9.2209    // Device should ok to receive command
  9.2210 -  while (1) {
  9.2211 -    status = inb(iobase1 + ATA_CB_STAT);
  9.2212 -    if ( !(status & ATA_CB_STAT_BSY) ) break;
  9.2213 -    }
  9.2214 +  await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
  9.2215 +  status = inb(iobase1 + ATA_CB_STAT);
  9.2216  
  9.2217    if (status & ATA_CB_STAT_ERR) {
  9.2218      BX_DEBUG_ATA("ata_cmd_packet : error, status is %02x\n",status);
  9.2219 @@ -3326,13 +3356,13 @@ Bit32u length;
  9.2220    // Send command to device
  9.2221  ASM_START
  9.2222        sti  ;; enable higher priority interrupts
  9.2223 - 
  9.2224 +
  9.2225        push bp
  9.2226        mov  bp, sp
  9.2227 -    
  9.2228 -      mov  si, _ata_cmd_packet.cmdoff + 2[bp]  
  9.2229 -      mov  ax, _ata_cmd_packet.cmdseg + 2[bp] 
  9.2230 -      mov  cx, _ata_cmd_packet.cmdlen + 2[bp] 
  9.2231 +
  9.2232 +      mov  si, _ata_cmd_packet.cmdoff + 2[bp]
  9.2233 +      mov  ax, _ata_cmd_packet.cmdseg + 2[bp]
  9.2234 +      mov  cx, _ata_cmd_packet.cmdlen + 2[bp]
  9.2235        mov  es, ax      ;; segment in es
  9.2236  
  9.2237        mov  dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data write port
  9.2238 @@ -3345,32 +3375,38 @@ ASM_START
  9.2239  ASM_END
  9.2240  
  9.2241    if (inout == ATA_DATA_NO) {
  9.2242 +    await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
  9.2243      status = inb(iobase1 + ATA_CB_STAT);
  9.2244      }
  9.2245    else {
  9.2246 +        Bit16u loops = 0;
  9.2247 +        Bit8u sc;
  9.2248    while (1) {
  9.2249  
  9.2250 +      if (loops == 0) {//first time through
  9.2251 +        status = inb(iobase2 + ATA_CB_ASTAT);
  9.2252 +        await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
  9.2253 +      }
  9.2254 +      else
  9.2255 +        await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
  9.2256 +      loops++;
  9.2257 +
  9.2258        status = inb(iobase1 + ATA_CB_STAT);
  9.2259 +      sc = inb(iobase1 + ATA_CB_SC);
  9.2260  
  9.2261        // Check if command completed
  9.2262 -      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_DRQ) ) ==0 ) break;
  9.2263 +      if(((inb(iobase1 + ATA_CB_SC)&0x7)==0x3) &&
  9.2264 +         ((status & (ATA_CB_STAT_RDY | ATA_CB_STAT_ERR)) == ATA_CB_STAT_RDY)) break;
  9.2265  
  9.2266        if (status & ATA_CB_STAT_ERR) {
  9.2267          BX_DEBUG_ATA("ata_cmd_packet : error (status %02x)\n",status);
  9.2268          return 3;
  9.2269        }
  9.2270  
  9.2271 -      // Device must be ready to send data
  9.2272 -      if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
  9.2273 -            != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
  9.2274 -        BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", status);
  9.2275 -        return 4;
  9.2276 -        }
  9.2277 -
  9.2278        // Normalize address
  9.2279        bufseg += (bufoff / 16);
  9.2280        bufoff %= 16;
  9.2281 -    
  9.2282 +
  9.2283        // Get the byte count
  9.2284        lcount =  ((Bit16u)(inb(iobase1 + ATA_CB_CH))<<8)+inb(iobase1 + ATA_CB_CL);
  9.2285  
  9.2286 @@ -3431,10 +3467,10 @@ ASM_START
  9.2287  
  9.2288          mov  dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data read port
  9.2289  
  9.2290 -        mov  cx, _ata_cmd_packet.lbefore + 2[bp] 
  9.2291 +        mov  cx, _ata_cmd_packet.lbefore + 2[bp]
  9.2292          jcxz ata_packet_no_before
  9.2293  
  9.2294 -        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
  9.2295 +        mov  ah, _ata_cmd_packet.lmode + 2[bp]
  9.2296          cmp  ah, #ATA_MODE_PIO32
  9.2297          je   ata_packet_in_before_32
  9.2298  
  9.2299 @@ -3451,14 +3487,14 @@ ata_packet_in_before_32_loop:
  9.2300          pop  eax
  9.2301  
  9.2302  ata_packet_no_before:
  9.2303 -        mov  cx, _ata_cmd_packet.lcount + 2[bp] 
  9.2304 +        mov  cx, _ata_cmd_packet.lcount + 2[bp]
  9.2305          jcxz ata_packet_after
  9.2306  
  9.2307 -        mov  di, _ata_cmd_packet.bufoff + 2[bp]  
  9.2308 -        mov  ax, _ata_cmd_packet.bufseg + 2[bp] 
  9.2309 +        mov  di, _ata_cmd_packet.bufoff + 2[bp]
  9.2310 +        mov  ax, _ata_cmd_packet.bufseg + 2[bp]
  9.2311          mov  es, ax
  9.2312  
  9.2313 -        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
  9.2314 +        mov  ah, _ata_cmd_packet.lmode + 2[bp]
  9.2315          cmp  ah, #ATA_MODE_PIO32
  9.2316          je   ata_packet_in_32
  9.2317  
  9.2318 @@ -3472,10 +3508,10 @@ ata_packet_in_32:
  9.2319            insd ;; CX dwords transfered to port(DX) to ES:[DI]
  9.2320  
  9.2321  ata_packet_after:
  9.2322 -        mov  cx, _ata_cmd_packet.lafter + 2[bp] 
  9.2323 +        mov  cx, _ata_cmd_packet.lafter + 2[bp]
  9.2324          jcxz ata_packet_done
  9.2325  
  9.2326 -        mov  ah, _ata_cmd_packet.lmode + 2[bp] 
  9.2327 +        mov  ah, _ata_cmd_packet.lmode + 2[bp]
  9.2328          cmp  ah, #ATA_MODE_PIO32
  9.2329          je   ata_packet_in_after_32
  9.2330  
  9.2331 @@ -3505,7 +3541,7 @@ ASM_END
  9.2332      }
  9.2333  
  9.2334    // Final check, device must be ready
  9.2335 -  if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) ) 
  9.2336 +  if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
  9.2337           != ATA_CB_STAT_RDY ) {
  9.2338      BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", (unsigned) status);
  9.2339      return 4;
  9.2340 @@ -3524,55 +3560,108 @@ ASM_END
  9.2341  // Start of ATA/ATAPI generic functions
  9.2342  // ---------------------------------------------------------------------------
  9.2343  
  9.2344 -  Bit16u 
  9.2345 -atapi_get_sense(device)
  9.2346 +  Bit16u
  9.2347 +atapi_get_sense(device, seg, asc, ascq)
  9.2348    Bit16u device;
  9.2349  {
  9.2350    Bit8u  atacmd[12];
  9.2351 -  Bit8u  buffer[16];
  9.2352 +  Bit8u  buffer[18];
  9.2353    Bit8u i;
  9.2354  
  9.2355    memsetb(get_SS(),atacmd,0,12);
  9.2356  
  9.2357 -  // Request SENSE 
  9.2358 -  atacmd[0]=0x03;    
  9.2359 -  atacmd[4]=0x20;    
  9.2360 -  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 16L, ATA_DATA_IN, get_SS(), buffer) != 0)
  9.2361 +  // Request SENSE
  9.2362 +  atacmd[0]=ATA_CMD_REQUEST_SENSE;
  9.2363 +  atacmd[4]=sizeof(buffer);
  9.2364 +  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 18L, ATA_DATA_IN, get_SS(), buffer) != 0)
  9.2365      return 0x0002;
  9.2366  
  9.2367 -  if ((buffer[0] & 0x7e) == 0x70) {
  9.2368 -    return (((Bit16u)buffer[2]&0x0f)*0x100)+buffer[12];
  9.2369 -    }
  9.2370 +  write_byte(seg,asc,buffer[12]);
  9.2371 +  write_byte(seg,ascq,buffer[13]);
  9.2372  
  9.2373    return 0;
  9.2374  }
  9.2375  
  9.2376 -  Bit16u 
  9.2377 +  Bit16u
  9.2378  atapi_is_ready(device)
  9.2379    Bit16u device;
  9.2380  {
  9.2381 -  Bit8u  atacmd[12];
  9.2382 -  Bit8u  buffer[];
  9.2383 -
  9.2384 -  memsetb(get_SS(),atacmd,0,12);
  9.2385 - 
  9.2386 -  // Test Unit Ready
  9.2387 -  if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, get_SS(), buffer) != 0)
  9.2388 -    return 0x000f;
  9.2389 -
  9.2390 -  if (atapi_get_sense(device) !=0 ) {
  9.2391 -    memsetb(get_SS(),atacmd,0,12);
  9.2392 -
  9.2393 -    // try to send Test Unit Ready again
  9.2394 -    if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, get_SS(), buffer) != 0)
  9.2395 -      return 0x000f;
  9.2396 -
  9.2397 -    return atapi_get_sense(device);
  9.2398 +  Bit8u packet[12];
  9.2399 +  Bit8u buf[8];
  9.2400 +  Bit32u block_len;
  9.2401 +  Bit32u sectors;
  9.2402 +  Bit32u timeout; //measured in ms
  9.2403 +  Bit32u time;
  9.2404 +  Bit8u asc, ascq;
  9.2405 +  Bit8u in_progress;
  9.2406 +  Bit16u ebda_seg = read_word(0x0040,0x000E);
  9.2407 +  if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != ATA_TYPE_ATAPI) {
  9.2408 +    printf("not implemented for non-ATAPI device\n");
  9.2409 +    return -1;
  9.2410 +  }
  9.2411 +
  9.2412 +  BX_DEBUG_ATA("ata_detect_medium: begin\n");
  9.2413 +  memsetb(get_SS(),packet, 0, sizeof packet);
  9.2414 +  packet[0] = 0x25; /* READ CAPACITY */
  9.2415 +
  9.2416 +  /* Retry READ CAPACITY 50 times unless MEDIUM NOT PRESENT
  9.2417 +   * is reported by the device. If the device reports "IN PROGRESS",
  9.2418 +   * 30 seconds is added. */
  9.2419 +  timeout = 5000;
  9.2420 +  time = 0;
  9.2421 +  in_progress = 0;
  9.2422 +  while (time < timeout) {
  9.2423 +    if (ata_cmd_packet(device, sizeof(packet), get_SS(), packet, 0, 8L, ATA_DATA_IN, get_SS(), buf) == 0)
  9.2424 +      goto ok;
  9.2425 +
  9.2426 +    if (atapi_get_sense(device, get_SS(), &asc, &ascq) == 0) {
  9.2427 +      if (asc == 0x3a) { /* MEDIUM NOT PRESENT */
  9.2428 +        BX_DEBUG_ATA("Device reports MEDIUM NOT PRESENT\n");
  9.2429 +        return -1;
  9.2430 +      }
  9.2431 +
  9.2432 +      if (asc == 0x04 && ascq == 0x01 && !in_progress) {
  9.2433 +        /* IN PROGRESS OF BECOMING READY */
  9.2434 +        printf("Waiting for device to detect medium... ");
  9.2435 +        /* Allow 30 seconds more */
  9.2436 +        timeout = 30000;
  9.2437 +        in_progress = 1;
  9.2438 +      }
  9.2439      }
  9.2440 +    time += 100;
  9.2441 +  }
  9.2442 +  BX_DEBUG_ATA("read capacity failed\n");
  9.2443 +  return -1;
  9.2444 +ok:
  9.2445 +
  9.2446 +  block_len = (Bit32u) buf[4] << 24
  9.2447 +    | (Bit32u) buf[5] << 16
  9.2448 +    | (Bit32u) buf[6] << 8
  9.2449 +    | (Bit32u) buf[7] << 0;
  9.2450 +  BX_DEBUG_ATA("block_len=%u\n", block_len);
  9.2451 +
  9.2452 +  if (block_len!= 2048 && block_len!= 512)
  9.2453 +  {
  9.2454 +    printf("Unsupported sector size %u\n", block_len);
  9.2455 +    return -1;
  9.2456 +  }
  9.2457 +  write_dword(ebda_seg,&EbdaData->ata.devices[device].blksize, block_len);
  9.2458 +
  9.2459 +  sectors = (Bit32u) buf[0] << 24
  9.2460 +    | (Bit32u) buf[1] << 16
  9.2461 +    | (Bit32u) buf[2] << 8
  9.2462 +    | (Bit32u) buf[3] << 0;
  9.2463 +
  9.2464 +  BX_DEBUG_ATA("sectors=%u\n", sectors);
  9.2465 +  if (block_len == 2048)
  9.2466 +    sectors <<= 2; /* # of sectors in 512-byte "soft" sector */
  9.2467 +  if (sectors != read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low))
  9.2468 +    printf("%dMB medium detected\n", sectors>>(20-9));
  9.2469 +  write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low, sectors);
  9.2470    return 0;
  9.2471  }
  9.2472  
  9.2473 -  Bit16u 
  9.2474 +  Bit16u
  9.2475  atapi_is_cdrom(device)
  9.2476    Bit8u device;
  9.2477  {
  9.2478 @@ -3632,7 +3721,7 @@ static char eltorito[24]="EL TORITO SPEC
  9.2479  //
  9.2480  // Returns ah: emulated drive, al: error code
  9.2481  //
  9.2482 -  Bit16u 
  9.2483 +  Bit16u
  9.2484  cdrom_boot()
  9.2485  {
  9.2486    Bit16u ebda_seg=read_word(0x0040,0x000E);
  9.2487 @@ -3645,10 +3734,13 @@ cdrom_boot()
  9.2488    for (device=0; device<BX_MAX_ATA_DEVICES;device++) {
  9.2489      if (atapi_is_cdrom(device)) break;
  9.2490      }
  9.2491 -  
  9.2492 +
  9.2493    // if not found
  9.2494    if(device >= BX_MAX_ATA_DEVICES) return 2;
  9.2495  
  9.2496 +  if(error = atapi_is_ready(device) != 0)
  9.2497 +    BX_INFO("ata_is_ready returned %d\n",error);
  9.2498 +
  9.2499    // Read the Boot Record Volume Descriptor
  9.2500    memsetb(get_SS(),atacmd,0,12);
  9.2501    atacmd[0]=0x28;                      // READ command
  9.2502 @@ -3668,7 +3760,7 @@ cdrom_boot()
  9.2503     }
  9.2504    for(i=0;i<23;i++)
  9.2505      if(buffer[7+i]!=read_byte(0xf000,&eltorito[i]))return 6;
  9.2506 -  
  9.2507 +
  9.2508    // ok, now we calculate the Boot catalog address
  9.2509    lba=buffer[0x4A]*0x1000000+buffer[0x49]*0x10000+buffer[0x48]*0x100+buffer[0x47];
  9.2510  
  9.2511 @@ -3683,7 +3775,7 @@ cdrom_boot()
  9.2512    atacmd[5]=(lba & 0x000000ff);
  9.2513    if((error = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 2048L, ATA_DATA_IN, get_SS(), buffer)) != 0)
  9.2514      return 7;
  9.2515 - 
  9.2516 +
  9.2517    // Validation entry
  9.2518    if(buffer[0x00]!=0x01)return 8;   // Header
  9.2519    if(buffer[0x01]!=0x00)return 9;   // Platform
  9.2520 @@ -3702,10 +3794,10 @@ cdrom_boot()
  9.2521  
  9.2522    write_byte(ebda_seg,&EbdaData->cdemu.media,buffer[0x21]);
  9.2523    if(buffer[0x21]==0){
  9.2524 -    // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0. 
  9.2525 +    // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0.
  9.2526      // Win2000 cd boot needs to know it booted from cd
  9.2527      write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0xE0);
  9.2528 -    } 
  9.2529 +    }
  9.2530    else if(buffer[0x21]<4)
  9.2531      write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0x00);
  9.2532    else
  9.2533 @@ -3719,7 +3811,7 @@ cdrom_boot()
  9.2534  
  9.2535    write_word(ebda_seg,&EbdaData->cdemu.load_segment,boot_segment);
  9.2536    write_word(ebda_seg,&EbdaData->cdemu.buffer_segment,0x0000);
  9.2537 -  
  9.2538 +
  9.2539    nbsectors=buffer[0x27]*0x100+buffer[0x26];
  9.2540    write_word(ebda_seg,&EbdaData->cdemu.sector_count,nbsectors);
  9.2541  
  9.2542 @@ -3744,7 +3836,6 @@ cdrom_boot()
  9.2543    tcpa_ipl((Bit32u)1L,(Bit32u)boot_segment,(Bit32u)0L,(Bit32u)512L);
  9.2544  #endif
  9.2545  
  9.2546 -
  9.2547    // Remember the media type
  9.2548    switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
  9.2549      case 0x01:  // 1.2M floppy
  9.2550 @@ -3765,7 +3856,7 @@ cdrom_boot()
  9.2551      case 0x04:  // Harddrive
  9.2552        write_word(ebda_seg,&EbdaData->cdemu.vdevice.spt,read_byte(boot_segment,446+6)&0x3f);
  9.2553        write_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders,
  9.2554 -	      (read_byte(boot_segment,446+6)<<2) + read_byte(boot_segment,446+7) + 1);
  9.2555 +              (read_byte(boot_segment,446+6)<<2) + read_byte(boot_segment,446+7) + 1);
  9.2556        write_word(ebda_seg,&EbdaData->cdemu.vdevice.heads,read_byte(boot_segment,446+5) + 1);
  9.2557        break;
  9.2558     }
  9.2559 @@ -3778,7 +3869,7 @@ cdrom_boot()
  9.2560        write_byte(ebda_seg, &EbdaData->ata.hdcount, read_byte(ebda_seg, &EbdaData->ata.hdcount) + 1);
  9.2561     }
  9.2562  
  9.2563 -  
  9.2564 +
  9.2565    // everything is ok, so from now on, the emulation is active
  9.2566    if(read_byte(ebda_seg,&EbdaData->cdemu.media)!=0)
  9.2567      write_byte(ebda_seg,&EbdaData->cdemu.active,0x01);
  9.2568 @@ -4124,9 +4215,10 @@ ASM_END
  9.2569        regs.u.r8.al = inb_cmos(0x30);
  9.2570        regs.u.r8.ah = inb_cmos(0x31);
  9.2571  
  9.2572 -      // limit to 15M
  9.2573 -      if(regs.u.r16.ax > 0x3c00)
  9.2574 -        regs.u.r16.ax = 0x3c00;
  9.2575 +      // According to Ralf Brown's interrupt the limit should be 15M,
  9.2576 +      // but real machines mostly return max. 63M.
  9.2577 +      if(regs.u.r16.ax > 0xffc0)
  9.2578 +        regs.u.r16.ax = 0xffc0;
  9.2579  
  9.2580        CLEAR_CF();
  9.2581  #endif
  9.2582 @@ -4344,13 +4436,35 @@ BX_DEBUG_INT15("case 2:\n");
  9.2583  
  9.2584          case 3: // Set Resolution
  9.2585  BX_DEBUG_INT15("case 3:\n");
  9.2586 -          // BX:
  9.2587 +          // BH:
  9.2588            //      0 =  25 dpi, 1 count  per millimeter
  9.2589            //      1 =  50 dpi, 2 counts per millimeter
  9.2590            //      2 = 100 dpi, 4 counts per millimeter
  9.2591            //      3 = 200 dpi, 8 counts per millimeter
  9.2592 -          CLEAR_CF();
  9.2593 -          regs.u.r8.ah = 0;
  9.2594 +          comm_byte = inhibit_mouse_int_and_events(); // disable IRQ12 and packets
  9.2595 +          if (regs.u.r8.bh < 4) {
  9.2596 +            ret = send_to_mouse_ctrl(0xE8); // set resolution command
  9.2597 +            if (ret == 0) {
  9.2598 +              ret = get_mouse_data(&mouse_data1);
  9.2599 +              if (mouse_data1 != 0xfa)
  9.2600 +                BX_PANIC("Mouse status returned %02x (should be ack)\n", (unsigned)mouse_data1);
  9.2601 +              ret = send_to_mouse_ctrl(regs.u.r8.bh);
  9.2602 +              ret = get_mouse_data(&mouse_data1);
  9.2603 +              if (mouse_data1 != 0xfa)
  9.2604 +                BX_PANIC("Mouse status returned %02x (should be ack)\n", (unsigned)mouse_data1);
  9.2605 +              CLEAR_CF();
  9.2606 +              regs.u.r8.ah = 0;
  9.2607 +            } else {
  9.2608 +              // error
  9.2609 +              SET_CF();
  9.2610 +              regs.u.r8.ah = UNSUPPORTED_FUNCTION;
  9.2611 +            }
  9.2612 +          } else {
  9.2613 +            // error
  9.2614 +            SET_CF();
  9.2615 +            regs.u.r8.ah = UNSUPPORTED_FUNCTION;
  9.2616 +          }
  9.2617 +          set_kbd_command_byte(comm_byte); // restore IRQ12 and serial enable
  9.2618            break;
  9.2619  
  9.2620          case 4: // Get Device ID
  9.2621 @@ -4472,7 +4586,30 @@ BX_DEBUG_INT15("case default:\n");
  9.2622        break;
  9.2623      }
  9.2624  }
  9.2625 -#endif
  9.2626 +#endif // BX_USE_PS2_MOUSE
  9.2627 +
  9.2628 +
  9.2629 +void set_e820_range(ES, DI, start, end, type)
  9.2630 +     Bit16u ES;
  9.2631 +     Bit16u DI;
  9.2632 +     Bit32u start;
  9.2633 +     Bit32u end;
  9.2634 +     Bit16u type;
  9.2635 +{
  9.2636 +    write_word(ES, DI, start);
  9.2637 +    write_word(ES, DI+2, start >> 16);
  9.2638 +    write_word(ES, DI+4, 0x00);
  9.2639 +    write_word(ES, DI+6, 0x00);
  9.2640 +
  9.2641 +    end -= start;
  9.2642 +    write_word(ES, DI+8, end);
  9.2643 +    write_word(ES, DI+10, end >> 16);
  9.2644 +    write_word(ES, DI+12, 0x0000);
  9.2645 +    write_word(ES, DI+14, 0x0000);
  9.2646 +
  9.2647 +    write_word(ES, DI+16, type);
  9.2648 +    write_word(ES, DI+18, 0x0);
  9.2649 +}
  9.2650  
  9.2651    void
  9.2652  int15_function32(regs, ES, DS, FLAGS)
  9.2653 @@ -4486,17 +4623,20 @@ BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.
  9.2654  
  9.2655    switch (regs.u.r8.ah) {
  9.2656      case 0x86:
  9.2657 -      // Wait for CX:DX microseconds. currently using the 
  9.2658 -      // refresh request port 0x61 bit4, toggling every 15usec 
  9.2659 +      // Wait for CX:DX microseconds. currently using the
  9.2660 +      // refresh request port 0x61 bit4, toggling every 15usec
  9.2661  
  9.2662        CX = regs.u.r16.cx;
  9.2663        DX = regs.u.r16.dx;
  9.2664  
  9.2665  ASM_START
  9.2666 +      sti
  9.2667 +
  9.2668        ;; Get the count in eax
  9.2669 -      mov  ax, .int15_function32.CX [bp]
  9.2670 +      mov  bx, sp
  9.2671 +      mov  ax, _int15_function32.CX [bx]
  9.2672        shl  eax, #16
  9.2673 -      mov  ax, .int15_function32.DX [bp]
  9.2674 +      mov  ax, _int15_function32.DX [bx]
  9.2675  
  9.2676        ;; convert to numbers of 15usec ticks
  9.2677        mov ebx, #15
  9.2678 @@ -4527,7 +4667,8 @@ ASM_END
  9.2679      case 0xe8:
  9.2680          switch(regs.u.r8.al)
  9.2681          {
  9.2682 -        case 0x20: {
  9.2683 +#ifdef HVMASSIST
  9.2684 +       case 0x20: {
  9.2685              Bit16u e820_table_size = read_word(0xe000, 0x8) * 0x14;
  9.2686  
  9.2687              if (regs.u.r32.edx != 0x534D4150) /* SMAP */
  9.2688 @@ -4575,7 +4716,7 @@ ASM_END
  9.2689              // Get the amount of extended memory (above 1M)
  9.2690              regs.u.r8.cl = inb_cmos(0x30);
  9.2691              regs.u.r8.ch = inb_cmos(0x31);
  9.2692 -          
  9.2693 +
  9.2694              // limit to 15M
  9.2695              if (regs.u.r16.cx > (15*1024))
  9.2696                  regs.u.r16.cx = 15*1024;
  9.2697 @@ -4603,7 +4744,7 @@ ASM_END
  9.2698              regs.u.r16.bx = regs.u.r16.dx;
  9.2699              break;
  9.2700          }
  9.2701 -	default:  /* AH=0xE8?? but not implemented */
  9.2702 +        default:  /* AH=0xE8?? but not implemented */
  9.2703              goto int15_unimplemented;
  9.2704          }
  9.2705          break;
  9.2706 @@ -4616,17 +4757,179 @@ ASM_END
  9.2707        regs.u.r8.ah = UNSUPPORTED_FUNCTION;
  9.2708        break;
  9.2709      }
  9.2710 +#else
  9.2711 +         case 0x20: // coded by osmaker aka K.J.
  9.2712 +            if(regs.u.r32.edx == 0x534D4150)
  9.2713 +            {
  9.2714 +                extended_memory_size = inb_cmos(0x35);
  9.2715 +                extended_memory_size <<= 8;
  9.2716 +                extended_memory_size |= inb_cmos(0x34);
  9.2717 +                extended_memory_size *= 64;
  9.2718 +                // greater than EFF00000???
  9.2719 +                if(extended_memory_size > 0x3bc000) {
  9.2720 +                    extended_memory_size = 0x3bc000; // everything after this is reserved memory until we get to 0x100000000
  9.2721 +                }
  9.2722 +                extended_memory_size *= 1024;
  9.2723 +                extended_memory_size += (16L * 1024 * 1024);
  9.2724 +
  9.2725 +                if(extended_memory_size <= (16L * 1024 * 1024)) {
  9.2726 +                    extended_memory_size = inb_cmos(0x31);
  9.2727 +                    extended_memory_size <<= 8;
  9.2728 +                    extended_memory_size |= inb_cmos(0x30);
  9.2729 +                    extended_memory_size *= 1024;
  9.2730 +                    extended_memory_size += (1L * 1024 * 1024);
  9.2731 +                }
  9.2732 +
  9.2733 +                switch(regs.u.r16.bx)
  9.2734 +                {
  9.2735 +                    case 0:
  9.2736 +                        set_e820_range(ES, regs.u.r16.di,
  9.2737 +                                       0x0000000L, 0x0009f000L, 1);
  9.2738 +                        regs.u.r32.ebx = 1;
  9.2739 +                        regs.u.r32.eax = 0x534D4150;
  9.2740 +                        regs.u.r32.ecx = 0x14;
  9.2741 +                        CLEAR_CF();
  9.2742 +                        return;
  9.2743 +                        break;
  9.2744 +                    case 1:
  9.2745 +                        set_e820_range(ES, regs.u.r16.di,
  9.2746 +                                       0x0009f000L, 0x000a0000L, 2);
  9.2747 +                        regs.u.r32.ebx = 2;
  9.2748 +                        regs.u.r32.eax = 0x534D4150;
  9.2749 +                        regs.u.r32.ecx = 0x14;
  9.2750 +                        CLEAR_CF();
  9.2751 +                        return;
  9.2752 +                        break;
  9.2753 +                    case 2:
  9.2754 +                        set_e820_range(ES, regs.u.r16.di,
  9.2755 +                                       0x000e8000L, 0x00100000L, 2);
  9.2756 +                        regs.u.r32.ebx = 3;
  9.2757 +                        regs.u.r32.eax = 0x534D4150;
  9.2758 +                        regs.u.r32.ecx = 0x14;
  9.2759 +                        CLEAR_CF();
  9.2760 +                        return;
  9.2761 +                        break;
  9.2762 +                    case 3:
  9.2763 +#if BX_ROMBIOS32
  9.2764 +                        set_e820_range(ES, regs.u.r16.di,
  9.2765 +                                       0x00100000L,
  9.2766 +                                       extended_memory_size - ACPI_DATA_SIZE, 1);
  9.2767 +                        regs.u.r32.ebx = 4;
  9.2768 +#else
  9.2769 +                        set_e820_range(ES, regs.u.r16.di,
  9.2770 +                                       0x00100000L,
  9.2771 +                                       extended_memory_size, 1);
  9.2772 +                        regs.u.r32.ebx = 5;
  9.2773 +#endif
  9.2774 +                        regs.u.r32.eax = 0x534D4150;
  9.2775 +                        regs.u.r32.ecx = 0x14;
  9.2776 +                        CLEAR_CF();
  9.2777 +                        return;
  9.2778 +                        break;
  9.2779 +                    case 4:
  9.2780 +                        set_e820_range(ES, regs.u.r16.di,
  9.2781 +                                       extended_memory_size - ACPI_DATA_SIZE,
  9.2782 +                                       extended_memory_size, 3); // ACPI RAM
  9.2783 +                        regs.u.r32.ebx = 5;
  9.2784 +                        regs.u.r32.eax = 0x534D4150;
  9.2785 +                        regs.u.r32.ecx = 0x14;
  9.2786 +                        CLEAR_CF();
  9.2787 +                        return;
  9.2788 +                        break;
  9.2789 +                    case 5:
  9.2790 +                        /* 256KB BIOS area at the end of 4 GB */
  9.2791 +                        set_e820_range(ES, regs.u.r16.di,
  9.2792 +                                       0xfffc0000L, 0x00000000L, 2);
  9.2793 +                        regs.u.r32.ebx = 0;
  9.2794 +                        regs.u.r32.eax = 0x534D4150;
  9.2795 +                        regs.u.r32.ecx = 0x14;
  9.2796 +                        CLEAR_CF();
  9.2797 +                        return;
  9.2798 +                    default:  /* AX=E820, DX=534D4150, BX unrecognized */
  9.2799 +                        goto int15_unimplemented;
  9.2800 +                        break;
  9.2801 +                }
  9.2802 +            } else {
  9.2803 +              // if DX != 0x534D4150)
  9.2804 +              goto int15_unimplemented;
  9.2805 +            }
  9.2806 +            break;
  9.2807 +
  9.2808 +        case 0x01:
  9.2809 +          // do we have any reason to fail here ?
  9.2810 +          CLEAR_CF();
  9.2811 +
  9.2812 +          // my real system sets ax and bx to 0
  9.2813 +          // this is confirmed by Ralph Brown list
  9.2814 +          // but syslinux v1.48 is known to behave
  9.2815 +          // strangely if ax is set to 0
  9.2816 +          // regs.u.r16.ax = 0;
  9.2817 +          // regs.u.r16.bx = 0;
  9.2818 +
  9.2819 +          // Get the amount of extended memory (above 1M)
  9.2820 +          regs.u.r8.cl = inb_cmos(0x30);
  9.2821 +          regs.u.r8.ch = inb_cmos(0x31);
  9.2822 +
  9.2823 +          // limit to 15M
  9.2824 +          if(regs.u.r16.cx > 0x3c00)
  9.2825 +          {
  9.2826 +            regs.u.r16.cx = 0x3c00;
  9.2827 +          }
  9.2828 +
  9.2829 +          // Get the amount of extended memory above 16M in 64k blocs
  9.2830 +          regs.u.r8.dl = inb_cmos(0x34);
  9.2831 +          regs.u.r8.dh = inb_cmos(0x35);
  9.2832 +
  9.2833 +          // Set configured memory equal to extended memory
  9.2834 +          regs.u.r16.ax = regs.u.r16.cx;
  9.2835 +          regs.u.r16.bx = regs.u.r16.dx;
  9.2836 +          break;
  9.2837 +        default:  /* AH=0xE8?? but not implemented */
  9.2838 +          goto int15_unimplemented;
  9.2839 +       }
  9.2840 +       break;
  9.2841 +    int15_unimplemented:
  9.2842 +       // fall into the default
  9.2843 +    default:
  9.2844 +      BX_INFO("*** int 15h function AX=%04x, BX=%04x not yet supported!\n",
  9.2845 +        (unsigned) regs.u.r16.ax, (unsigned) regs.u.r16.bx);
  9.2846 +      SET_CF();
  9.2847 +      regs.u.r8.ah = UNSUPPORTED_FUNCTION;
  9.2848 +      break;
  9.2849 +    }
  9.2850 +#endif /* HVMASSIST */
  9.2851  }
  9.2852  
  9.2853    void
  9.2854  int16_function(DI, SI, BP, SP, BX, DX, CX, AX, FLAGS)
  9.2855    Bit16u DI, SI, BP, SP, BX, DX, CX, AX, FLAGS;
  9.2856  {
  9.2857 -  Bit8u scan_code, ascii_code, shift_flags, count;
  9.2858 +  Bit8u scan_code, ascii_code, shift_flags, led_flags, count;
  9.2859    Bit16u kbd_code, max;
  9.2860  
  9.2861    BX_DEBUG_INT16("int16: AX=%04x BX=%04x CX=%04x DX=%04x \n", AX, BX, CX, DX);
  9.2862  
  9.2863 +  shift_flags = read_byte(0x0040, 0x17);
  9.2864 +  led_flags = read_byte(0x0040, 0x97);
  9.2865 +  if ((((shift_flags >> 4) & 0x07) ^ (led_flags & 0x07)) != 0) {
  9.2866 +ASM_START
  9.2867 +    cli
  9.2868 +ASM_END
  9.2869 +    outb(0x60, 0xed);
  9.2870 +    while ((inb(0x64) & 0x01) == 0) outb(0x80, 0x21);
  9.2871 +    if ((inb(0x60) == 0xfa)) {
  9.2872 +      led_flags &= 0xf8;
  9.2873 +      led_flags |= ((shift_flags >> 4) & 0x07);
  9.2874 +      outb(0x60, led_flags & 0x07);
  9.2875 +      while ((inb(0x64) & 0x01) == 0) outb(0x80, 0x21);
  9.2876 +      inb(0x60);
  9.2877 +      write_byte(0x0040, 0x97, led_flags);
  9.2878 +    }
  9.2879 +ASM_START
  9.2880 +    sti
  9.2881 +ASM_END
  9.2882 +  }
  9.2883 +
  9.2884    switch (GET_AH()) {
  9.2885      case 0x00: /* read keyboard input */
  9.2886  
  9.2887 @@ -4664,7 +4967,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
  9.2888        break;
  9.2889  
  9.2890      case 0x09: /* GET KEYBOARD FUNCTIONALITY */
  9.2891 -      // bit Bochs Description     
  9.2892 +      // bit Bochs Description
  9.2893        //  7    0   reserved
  9.2894        //  6    0   INT 16/AH=20h-22h supported (122-key keyboard support)
  9.2895        //  5    1   INT 16/AH=10h-12h supported (enhanced keyboard support)
  9.2896 @@ -4694,7 +4997,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
  9.2897                kbd_code |= (inb(0x60) << 8);
  9.2898              }
  9.2899            } while (--count>0);
  9.2900 -	}
  9.2901 +        }
  9.2902        }
  9.2903        BX=kbd_code;
  9.2904        break;
  9.2905 @@ -4721,7 +5024,8 @@ int16_function(DI, SI, BP, SP, BX, DX, C
  9.2906      case 0x12: /* get extended keyboard status */
  9.2907        shift_flags = read_byte(0x0040, 0x17);
  9.2908        SET_AL(shift_flags);
  9.2909 -      shift_flags = read_byte(0x0040, 0x18);
  9.2910 +      shift_flags = read_byte(0x0040, 0x18) & 0x73;
  9.2911 +      shift_flags |= read_byte(0x0040, 0x96) & 0x0c;
  9.2912        SET_AH(shift_flags);
  9.2913        BX_DEBUG_INT16("int16: func 12 sending %04x\n",AX);
  9.2914        break;
  9.2915 @@ -4736,7 +5040,7 @@ int16_function(DI, SI, BP, SP, BX, DX, C
  9.2916  
  9.2917      case 0x6F:
  9.2918        if (GET_AL() == 0x08)
  9.2919 -	SET_AH(0x02); // unsupported, aka normal keyboard
  9.2920 +        SET_AH(0x02); // unsupported, aka normal keyboard
  9.2921  
  9.2922      default:
  9.2923        BX_INFO("KBD: unsupported int 16h function %02x\n", GET_AH());
  9.2924 @@ -4877,7 +5181,7 @@ int09_function(DI, SI, BP, SP, BX, DX, C
  9.2925    Bit16u DI, SI, BP, SP, BX, DX, CX, AX;
  9.2926  {
  9.2927    Bit8u scancode, asciicode, shift_flags;
  9.2928 -  Bit8u mf2_flags, mf2_state, led_flags;
  9.2929 +  Bit8u mf2_flags, mf2_state;
  9.2930  
  9.2931    //
  9.2932    // DS has been set to F000 before call
  9.2933 @@ -4895,7 +5199,6 @@ int09_function(DI, SI, BP, SP, BX, DX, C
  9.2934    shift_flags = read_byte(0x0040, 0x17);
  9.2935    mf2_flags = read_byte(0x0040, 0x18);
  9.2936    mf2_state = read_byte(0x0040, 0x96);
  9.2937 -  led_flags = read_byte(0x0040, 0x97);
  9.2938    asciicode = 0;
  9.2939  
  9.2940    switch (scancode) {
  9.2941 @@ -4904,8 +5207,6 @@ int09_function(DI, SI, BP, SP, BX, DX, C
  9.2942        write_byte(0x0040, 0x17, shift_flags);
  9.2943        mf2_flags |= 0x40;
  9.2944        write_byte(0x0040, 0x18, mf2_flags);
  9.2945 -      led_flags ^= 0x04;
  9.2946 -      write_byte(0x0040, 0x97, led_flags);
  9.2947        break;
  9.2948      case 0xba: /* Caps Lock release */
  9.2949        mf2_flags &= ~0x40;
  9.2950 @@ -4913,11 +5214,8 @@ int09_function(DI, SI, BP, SP, BX, DX, C
  9.2951        break;
  9.2952  
  9.2953      case 0x2a: /* L Shift press */
  9.2954 -      /*shift_flags &= ~0x40;*/
  9.2955        shift_flags |= 0x02;
  9.2956        write_byte(0x0040, 0x17, shift_flags);
  9.2957 -      led_flags &= ~0x04;
  9.2958 -      write_byte(0x0040, 0x97, led_flags);
  9.2959        break;
  9.2960      case 0xaa: /* L Shift release */
  9.2961        shift_flags &= ~0x02;
  9.2962 @@ -4925,11 +5223,8 @@ int09_function(DI, SI, BP, SP, BX, DX, C
  9.2963        break;
  9.2964  
  9.2965      case 0x36: /* R Shift press */
  9.2966 -      /*shift_flags &= ~0x40;*/
  9.2967        shift_flags |= 0x01;
  9.2968        write_byte(0x0040, 0x17, shift_flags);
  9.2969 -      led_flags &= ~0x04;
  9.2970 -      write_byte(0x0040, 0x97, led_flags);
  9.2971        break;
  9.2972      case 0xb6: /* R Shift release */
  9.2973        shift_flags &= ~0x01;
  9.2974 @@ -4937,71 +5232,75 @@ int09_function(DI, SI, BP, SP, BX, DX, C
  9.2975        break;
  9.2976  
  9.2977      case 0x1d: /* Ctrl press */
  9.2978 -      shift_flags |= 0x04;
  9.2979 -      write_byte(0x0040, 0x17, shift_flags);
  9.2980 -      if (mf2_state & 0x01) {
  9.2981 -        mf2_flags |= 0x04;
  9.2982 -      } else {
  9.2983 -        mf2_flags |= 0x01;
  9.2984 -        }
  9.2985 -      write_byte(0x0040, 0x18, mf2_flags);
  9.2986 +      if ((mf2_state & 0x01) == 0) {
  9.2987 +        shift_flags |= 0x04;
  9.2988 +        write_byte(0x0040, 0x17, shift_flags);
  9.2989 +        if (mf2_state & 0x02) {
  9.2990 +          mf2_state |= 0x04;
  9.2991 +          write_byte(0x0040, 0x96, mf2_state);
  9.2992 +        } else {
  9.2993 +          mf2_flags |= 0x01;
  9.2994 +          write_byte(0x0040, 0x18, mf2_flags);
  9.2995 +        }
  9.2996 +      }
  9.2997        break;
  9.2998      case 0x9d: /* Ctrl release */
  9.2999 -      shift_flags &= ~0x04;
  9.3000 -      write_byte(0x0040, 0x17, shift_flags);
  9.3001 -      if (mf2_state & 0x01) {
  9.3002 -        mf2_flags &= ~0x04;
  9.3003 -      } else {
  9.3004 -        mf2_flags &= ~0x01;
  9.3005 -        }
  9.3006 -      write_byte(0x0040, 0x18, mf2_flags);
  9.3007 +      if ((mf2_state & 0x01) == 0) {
  9.3008 +        shift_flags &= ~0x04;
  9.3009 +        write_byte(0x0040, 0x17, shift_flags);
  9.3010 +        if (mf2_state & 0x02) {
  9.3011 +          mf2_state &= ~0x04;
  9.3012 +          write_byte(0x0040, 0x96, mf2_state);
  9.3013 +        } else {
  9.3014 +          mf2_flags &= ~0x01;
  9.3015 +          write_byte(0x0040, 0x18, mf2_flags);
  9.3016 +        }
  9.3017 +      }
  9.3018        break;
  9.3019  
  9.3020      case 0x38: /* Alt press */
  9.3021        shift_flags |= 0x08;
  9.3022        write_byte(0x0040, 0x17, shift_flags);
  9.3023 -      if (mf2_state & 0x01) {
  9.3024 -        mf2_flags |= 0x08;
  9.3025 +      if (mf2_state & 0x02) {
  9.3026 +        mf2_state |= 0x08;
  9.3027 +        write_byte(0x0040, 0x96, mf2_state);
  9.3028        } else {
  9.3029          mf2_flags |= 0x02;
  9.3030 -        }
  9.3031 -      write_byte(0x0040, 0x18, mf2_flags);
  9.3032 +        write_byte(0x0040, 0x18, mf2_flags);
  9.3033 +      }
  9.3034        break;
  9.3035      case 0xb8: /* Alt release */
  9.3036        shift_flags &= ~0x08;
  9.3037        write_byte(0x0040, 0x17, shift_flags);
  9.3038 -      if (mf2_state & 0x01) {
  9.3039 -        mf2_flags &= ~0x08;
  9.3040 +      if (mf2_state & 0x02) {
  9.3041 +        mf2_state &= ~0x08;
  9.3042 +        write_byte(0x0040, 0x96, mf2_state);
  9.3043        } else {
  9.3044          mf2_flags &= ~0x02;
  9.3045 -        }
  9.3046 -      write_byte(0x0040, 0x18, mf2_flags);
  9.3047 +        write_byte(0x0040, 0x18, mf2_flags);
  9.3048 +      }
  9.3049        break;
  9.3050  
  9.3051      case 0x45: /* Num Lock press */
  9.3052 -      if ((mf2_state & 0x01) == 0) {
  9.3053 +      if ((mf2_state & 0x03) == 0) {
  9.3054          mf2_flags |= 0x20;
  9.3055          write_byte(0x0040, 0x18, mf2_flags);
  9.3056          shift_flags ^= 0x20;
  9.3057 -        led_flags ^= 0x02;
  9.3058          write_byte(0x0040, 0x17, shift_flags);
  9.3059 -        write_byte(0x0040, 0x97, led_flags);
  9.3060 -        }
  9.3061 +      }
  9.3062        break;
  9.3063      case 0xc5: /* Num Lock release */
  9.3064 -      if ((mf2_state & 0x01) == 0) {
  9.3065 +      if ((mf2_state & 0x03) == 0) {
  9.3066          mf2_flags &= ~0x20;
  9.3067          write_byte(0x0040, 0x18, mf2_flags);
  9.3068 -        }
  9.3069 +      }
  9.3070        break;
  9.3071  
  9.3072      case 0x46: /* Scroll Lock press */
  9.3073        mf2_flags |= 0x10;
  9.3074        write_byte(0x0040, 0x18, mf2_flags);
  9.3075        shift_flags ^= 0x10;
  9.3076 -      led_flags ^= 0x01;
  9.3077        write_byte(0x0040, 0x17, shift_flags);
  9.3078 -      write_byte(0x0040, 0x97, led_flags);
  9.3079        break;
  9.3080  
  9.3081      case 0xc6: /* Scroll Lock release */
  9.3082 @@ -5014,50 +5313,55 @@ int09_function(DI, SI, BP, SP, BX, DX, C
  9.3083              machine_reset();
  9.3084          /* Fall through */
  9.3085      default:
  9.3086 -      if (scancode & 0x80) return; /* toss key releases ... */
  9.3087 +      if (scancode & 0x80) {
  9.3088 +        break; /* toss key releases ... */
  9.3089 +      }
  9.3090        if (scancode > MAX_SCAN_CODE) {
  9.3091 -        BX_INFO("KBD: int09h_handler(): unknown scancode (%x) read!\n", scancode);
  9.3092 +        BX_INFO("KBD: int09h_handler(): unknown scancode read: 0x%02x!\n", scancode);
  9.3093          return;
  9.3094 -        }
  9.3095 +      }
  9.3096        if (shift_flags & 0x08) { /* ALT */
  9.3097          asciicode = scan_to_scanascii[scancode].alt;
  9.3098          scancode = scan_to_scanascii[scancode].alt >> 8;
  9.3099 -        }
  9.3100 -      else if (shift_flags & 0x04) { /* CONTROL */
  9.3101 +      } else if (shift_flags & 0x04) { /* CONTROL */
  9.3102          asciicode = scan_to_scanascii[scancode].control;
  9.3103          scancode = scan_to_scanascii[scancode].control >> 8;
  9.3104 -        }
  9.3105 -      else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
  9.3106 -        /* check if lock state should be ignored 
  9.3107 +      } else if (((mf2_state & 0x02) > 0) && ((scancode >= 0x47) && (scancode <= 0x53))) {
  9.3108 +        /* extended keys handling */
  9.3109 +        asciicode = 0xe0;
  9.3110 +        scancode = scan_to_scanascii[scancode].normal >> 8;
  9.3111 +      } else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
  9.3112 +        /* check if lock state should be ignored
  9.3113           * because a SHIFT key are pressed */
  9.3114 -         
  9.3115 +
  9.3116          if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
  9.3117            asciicode = scan_to_scanascii[scancode].normal;
  9.3118            scancode = scan_to_scanascii[scancode].normal >> 8;
  9.3119 -          }
  9.3120 -        else {
  9.3121 +        } else {
  9.3122            asciicode = scan_to_scanascii[scancode].shift;
  9.3123            scancode = scan_to_scanascii[scancode].shift >> 8;
  9.3124 -          }
  9.3125 -        }
  9.3126 -      else {
  9.3127 +        }
  9.3128 +      } else {
  9.3129          /* check if lock is on */
  9.3130          if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
  9.3131            asciicode = scan_to_scanascii[scancode].shift;
  9.3132            scancode = scan_to_scanascii[scancode].shift >> 8;
  9.3133 -          }
  9.3134 -        else {
  9.3135 +        } else {
  9.3136            asciicode = scan_to_scanascii[scancode].normal;
  9.3137            scancode = scan_to_scanascii[scancode].normal >> 8;
  9.3138 -          }
  9.3139 -        }
  9.3140 +        }
  9.3141 +      }
  9.3142        if (scancode==0 && asciicode==0) {
  9.3143          BX_INFO("KBD: int09h_handler(): scancode & asciicode are zero?\n");
  9.3144 -        }
  9.3145 +      }
  9.3146        enqueue_key(scancode, asciicode);
  9.3147        break;
  9.3148 -    }
  9.3149 -  mf2_state &= ~0x01;
  9.3150 +  }
  9.3151 +  if ((scancode & 0x7f) != 0x1d) {
  9.3152 +    mf2_state &= ~0x01;
  9.3153 +  }
  9.3154 +  mf2_state &= ~0x02;
  9.3155 +  write_byte(0x0040, 0x96, mf2_state);
  9.3156  }
  9.3157  
  9.3158    unsigned int
  9.3159 @@ -5066,9 +5370,6 @@ enqueue_key(scan_code, ascii_code)
  9.3160  {
  9.3161    Bit16u buffer_start, buffer_end, buffer_head, buffer_tail, temp_tail;
  9.3162  
  9.3163 -  //BX_INFO("KBD:   enqueue_key() called scan:%02x, ascii:%02x\n",
  9.3164 -  //    scan_code, ascii_code);
  9.3165 -
  9.3166  #if BX_CPU < 2
  9.3167    buffer_start = 0x001E;
  9.3168    buffer_end   = 0x003E;
  9.3169 @@ -5118,9 +5419,8 @@ BX_DEBUG_INT74("int74: read byte %02x\n"
  9.3170    mouse_flags_2 = read_byte(ebda_seg, 0x0027);
  9.3171  
  9.3172    if ( (mouse_flags_2 & 0x80) != 0x80 ) {
  9.3173 -      //    BX_PANIC("int74_function:\n");
  9.3174        return;
  9.3175 -    }
  9.3176 +  }
  9.3177  
  9.3178    package_count = mouse_flags_2 & 0x07;
  9.3179    index = mouse_flags_1 & 0x07;
  9.3180 @@ -5148,10 +5448,10 @@ BX_DEBUG_INT74("int74_function: make_far
  9.3181  #if BX_USE_ATADRV
  9.3182  
  9.3183    void
  9.3184 -int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
  9.3185 -  Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
  9.3186 -{
  9.3187 -  Bit32u lba;
  9.3188 +int13_harddisk(EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
  9.3189 +  Bit16u EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
  9.3190 +{
  9.3191 +  Bit32u lba_low, lba_high;
  9.3192    Bit16u ebda_seg=read_word(0x0040,0x000E);
  9.3193    Bit16u cylinder, head, sector;
  9.3194    Bit16u segment, offset;
  9.3195 @@ -5172,12 +5472,12 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3196    // Get the ata channel
  9.3197    device=read_byte(ebda_seg,&EbdaData->ata.hdidmap[GET_ELDL()-0x80]);
  9.3198  
  9.3199 -  // basic check : device has to be valid 
  9.3200 +  // basic check : device has to be valid
  9.3201    if (device >= BX_MAX_ATA_DEVICES) {
  9.3202      BX_INFO("int13_harddisk: function %02x, unmapped device for ELDL=%02x\n", GET_AH(), GET_ELDL());
  9.3203      goto int13_fail;
  9.3204      }
  9.3205 -  
  9.3206 +
  9.3207    switch (GET_AH()) {
  9.3208  
  9.3209      case 0x00: /* disk controller reset */
  9.3210 @@ -5195,7 +5495,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3211        break;
  9.3212  
  9.3213      case 0x02: // read disk sectors
  9.3214 -    case 0x03: // write disk sectors 
  9.3215 +    case 0x03: // write disk sectors
  9.3216      case 0x04: // verify disk sectors
  9.3217  
  9.3218        count       = GET_AL();
  9.3219 @@ -5207,10 +5507,10 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3220        segment = ES;
  9.3221        offset  = BX;
  9.3222  
  9.3223 -      if ( (count > 128) || (count == 0) ) {
  9.3224 -        BX_INFO("int13_harddisk: function %02x, count out of range!\n",GET_AH());
  9.3225 +      if ((count > 128) || (count == 0) || (sector == 0)) {
  9.3226 +        BX_INFO("int13_harddisk: function %02x, parameter out of range!\n",GET_AH());
  9.3227          goto int13_fail;
  9.3228 -        }
  9.3229 +      }
  9.3230  
  9.3231        nlc   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.cylinders);
  9.3232        nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
  9.3233 @@ -5221,7 +5521,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3234          BX_INFO("int13_harddisk: function %02x, parameters out of range %04x/%04x/%04x!\n", GET_AH(), cylinder, head, sector);
  9.3235          goto int13_fail;
  9.3236          }
  9.3237 -      
  9.3238 +
  9.3239        // FIXME verify
  9.3240        if ( GET_AH() == 0x04 ) goto int13_success;
  9.3241  
  9.3242 @@ -5230,14 +5530,15 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3243  
  9.3244        // if needed, translate lchs to lba, and execute command
  9.3245        if ( (nph != nlh) || (npspt != nlspt)) {
  9.3246 -        lba = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * (Bit32u)nlspt) + (Bit32u)sector - 1;
  9.3247 +        lba_low = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * (Bit32u)nlspt) + (Bit32u)sector - 1;
  9.3248 +        lba_high = 0;
  9.3249          sector = 0; // this forces the command to be lba
  9.3250          }
  9.3251  
  9.3252        if ( GET_AH() == 0x02 )
  9.3253 -        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, head, sector, lba, segment, offset);
  9.3254 +        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, head, sector, lba_low, lba_high, segment, offset);
  9.3255        else
  9.3256 -        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, cylinder, head, sector, lba, segment, offset);
  9.3257 +        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, cylinder, head, sector, lba_low, lba_high, segment, offset);
  9.3258  
  9.3259        // Set nb of sector transferred
  9.3260        SET_AL(read_word(ebda_seg, &EbdaData->ata.trsfsectors));
  9.3261 @@ -5258,7 +5559,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3262        break;
  9.3263  
  9.3264      case 0x08: /* read disk drive parameters */
  9.3265 -      
  9.3266 +
  9.3267        // Get logical geometry from table
  9.3268        nlc   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.cylinders);
  9.3269        nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
  9.3270 @@ -5273,13 +5574,13 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3271        SET_DL(count); /* FIXME returns 0, 1, or n hard drives */
  9.3272  
  9.3273        // FIXME should set ES & DI
  9.3274 -      
  9.3275 +
  9.3276        goto int13_success;
  9.3277        break;
  9.3278  
  9.3279      case 0x10: /* check drive ready */
  9.3280        // should look at 40:8E also???
  9.3281 -      
  9.3282 +
  9.3283        // Read the status from controller
  9.3284        status = inb(read_word(ebda_seg, &EbdaData->ata.channels[device/2].iobase1) + ATA_CB_STAT);
  9.3285        if ( (status & ( ATA_CB_STAT_BSY | ATA_CB_STAT_RDY )) == ATA_CB_STAT_RDY ) {
  9.3286 @@ -5293,15 +5594,15 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3287  
  9.3288      case 0x15: /* read disk drive size */
  9.3289  
  9.3290 -      // Get physical geometry from table
  9.3291 -      npc   = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.cylinders);
  9.3292 -      nph   = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
  9.3293 -      npspt = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
  9.3294 +      // Get logical geometry from table
  9.3295 +      nlc   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.cylinders);
  9.3296 +      nlh   = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
  9.3297 +      nlspt = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.spt);
  9.3298  
  9.3299        // Compute sector count seen by int13
  9.3300 -      lba = (Bit32u)(npc - 1) * (Bit32u)nph * (Bit32u)npspt;
  9.3301 -      CX = lba >> 16;
  9.3302 -      DX = lba & 0xffff;
  9.3303 +      lba_low = (Bit32u)(nlc - 1) * (Bit32u)nlh * (Bit32u)nlspt;
  9.3304 +      CX = lba_low >> 16;
  9.3305 +      DX = lba_low & 0xffff;
  9.3306  
  9.3307        SET_AH(3);  // hard disk accessible
  9.3308        goto int13_success_noah;
  9.3309 @@ -5322,17 +5623,18 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3310        count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
  9.3311        segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
  9.3312        offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
  9.3313 - 
  9.3314 -      // Can't use 64 bits lba
  9.3315 -      lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
  9.3316 -      if (lba != 0L) {
  9.3317 -        BX_PANIC("int13_harddisk: function %02x. Can't use 64bits lba\n",GET_AH());
  9.3318 +
  9.3319 +      // Get 32 msb lba and check
  9.3320 +      lba_high=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
  9.3321 +      if (lba_high > read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_high) ) {
  9.3322 +        BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
  9.3323          goto int13_fail;
  9.3324          }
  9.3325  
  9.3326 -      // Get 32 bits lba and check
  9.3327 -      lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
  9.3328 -      if (lba >= read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors) ) {
  9.3329 +      // Get 32 lsb lba and check
  9.3330 +      lba_low=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
  9.3331 +      if (lba_high == read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_high)
  9.3332 +          && lba_low >= read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_low) ) {
  9.3333          BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
  9.3334          goto int13_fail;
  9.3335          }
  9.3336 @@ -5340,12 +5642,12 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3337        // If verify or seek
  9.3338        if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
  9.3339          goto int13_success;
  9.3340 -      
  9.3341 +
  9.3342        // Execute the command
  9.3343        if ( GET_AH() == 0x42 )
  9.3344 -        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, lba, segment, offset);
  9.3345 +        status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, lba_low, lba_high, segment, offset);
  9.3346        else
  9.3347 -        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, lba, segment, offset);
  9.3348 +        status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, lba_low, lba_high, segment, offset);
  9.3349  
  9.3350        count=read_word(ebda_seg, &EbdaData->ata.trsfsectors);
  9.3351        write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
  9.3352 @@ -5363,7 +5665,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3353      case 0x49: // IBM/MS extended media change
  9.3354        goto int13_success;    // Always success for HD
  9.3355        break;
  9.3356 -      
  9.3357 +
  9.3358      case 0x46: // IBM/MS eject media
  9.3359        SET_AH(0xb2);          // Volume Not Removable
  9.3360        goto int13_fail_noah;  // Always fail for HD
  9.3361 @@ -5373,7 +5675,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3362        size=read_word(DS,SI+(Bit16u)&Int13DPT->size);
  9.3363  
  9.3364        // Buffer is too small
  9.3365 -      if(size < 0x1a) 
  9.3366 +      if(size < 0x1a)
  9.3367          goto int13_fail;
  9.3368  
  9.3369        // EDD 1.x
  9.3370 @@ -5383,17 +5685,26 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3371          npc     = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.cylinders);
  9.3372          nph     = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
  9.3373          npspt   = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
  9.3374 -        lba     = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors);
  9.3375 +        lba_low = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_low);
  9.3376 +        lba_high = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_high);
  9.3377          blksize = read_word(ebda_seg, &EbdaData->ata.devices[device].blksize);
  9.3378  
  9.3379          write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1a);
  9.3380 -        write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is valid
  9.3381 -        write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
  9.3382 +        if (lba_high || (lba_low/npspt)/nph > 0x3fff)
  9.3383 +        {
  9.3384 +          write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x00); // geometry is invalid
  9.3385 +          write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, 0x3fff);
  9.3386 +        }
  9.3387 +        else
  9.3388 +        {
  9.3389 +          write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is valid
  9.3390 +          write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
  9.3391 +        }
  9.3392          write_dword(DS, SI+(Bit16u)&Int13DPT->heads, (Bit32u)nph);
  9.3393          write_dword(DS, SI+(Bit16u)&Int13DPT->spt, (Bit32u)npspt);
  9.3394 -        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba);  // FIXME should be Bit64
  9.3395 -        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0L);  
  9.3396 -        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);  
  9.3397 +        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba_low);
  9.3398 +        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, lba_high);
  9.3399 +        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
  9.3400          }
  9.3401  
  9.3402        // EDD 2.x
  9.3403 @@ -5403,8 +5714,8 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3404  
  9.3405          write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
  9.3406  
  9.3407 -        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);  
  9.3408 -        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);  
  9.3409 +        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
  9.3410 +        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
  9.3411  
  9.3412          // Fill in dpte
  9.3413          channel = device / 2;
  9.3414 @@ -5414,14 +5725,14 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3415          mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
  9.3416          translation = read_byte(ebda_seg, &EbdaData->ata.devices[device].translation);
  9.3417  
  9.3418 -        options  = (translation==ATA_TRANSLATION_NONE?0:1<<3); // chs translation
  9.3419 +        options  = (translation==ATA_TRANSLATION_NONE?0:1)<<3; // chs translation
  9.3420          options |= (1<<4); // lba translation
  9.3421 -        options |= (mode==ATA_MODE_PIO32?1:0<<7);
  9.3422 -        options |= (translation==ATA_TRANSLATION_LBA?1:0<<9); 
  9.3423 -        options |= (translation==ATA_TRANSLATION_RECHS?3:0<<9); 
  9.3424 +        options |= (mode==ATA_MODE_PIO32?1:0)<<7;
  9.3425 +        options |= (translation==ATA_TRANSLATION_LBA?1:0)<<9;
  9.3426 +        options |= (translation==ATA_TRANSLATION_RECHS?3:0)<<9;
  9.3427  
  9.3428          write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
  9.3429 -        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
  9.3430 +        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2 + ATA_CB_DC);
  9.3431          write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 2))<<4 );
  9.3432          write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
  9.3433          write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
  9.3434 @@ -5430,10 +5741,13 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3435          write_byte(ebda_seg, &EbdaData->ata.dpte.pio, 0 );
  9.3436          write_word(ebda_seg, &EbdaData->ata.dpte.options, options);
  9.3437          write_word(ebda_seg, &EbdaData->ata.dpte.reserved, 0);
  9.3438 -        write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
  9.3439 - 
  9.3440 +        if (size >=0x42)
  9.3441 +          write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
  9.3442 +        else
  9.3443 +          write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x10);
  9.3444 +
  9.3445          checksum=0;
  9.3446 -        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, (&EbdaData->ata.dpte) + i);
  9.3447 +        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, ((Bit8u*)(&EbdaData->ata.dpte)) + i);
  9.3448          checksum = ~checksum;
  9.3449          write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
  9.3450          }
  9.3451 @@ -5459,7 +5773,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3452            write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
  9.3453            write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
  9.3454            }
  9.3455 -        else { 
  9.3456 +        else {
  9.3457            // FIXME PCI
  9.3458            }
  9.3459          write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
  9.3460 @@ -5472,7 +5786,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3461            write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
  9.3462            write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
  9.3463            }
  9.3464 -        else { 
  9.3465 +        else {
  9.3466            // FIXME PCI
  9.3467            }
  9.3468          write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
  9.3469 @@ -5508,7 +5822,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3470      case 0x0d: /* alternate disk reset */
  9.3471      case 0x11: /* recalibrate */
  9.3472      case 0x14: /* controller internal diagnostic */
  9.3473 -      BX_INFO("int13h_harddisk function %02xh unimplemented, returns success\n", GET_AH());
  9.3474 +      BX_INFO("int13_harddisk: function %02xh unimplemented, returns success\n", GET_AH());
  9.3475        goto int13_success;
  9.3476        break;
  9.3477  
  9.3478 @@ -5517,7 +5831,7 @@ int13_harddisk(DS, ES, DI, SI, BP, ELDX,
  9.3479      case 0x18: // set media type for format
  9.3480      case 0x50: // IBM/MS send packet command
  9.3481      default:
  9.3482 -      BX_INFO("int13_harddisk function %02xh unsupported, returns fail\n", GET_AH());
  9.3483 +      BX_INFO("int13_harddisk: function %02xh unsupported, returns fail\n", GET_AH());
  9.3484        goto int13_fail;
  9.3485        break;
  9.3486      }
  9.3487 @@ -5553,8 +5867,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
  9.3488    Bit16u count, segment, offset, i, size;
  9.3489  
  9.3490    BX_DEBUG_INT13_CD("int13_cdrom: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
  9.3491 -  // BX_DEBUG_INT13_CD("int13_cdrom: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), DS, ES, DI, SI);
  9.3492 -  
  9.3493 +
  9.3494    SET_DISK_RET_STATUS(0x00);
  9.3495  
  9.3496    /* basic check : device should be 0xE0+ */
  9.3497 @@ -5571,16 +5884,16 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
  9.3498      BX_INFO("int13_cdrom: function %02x, unmapped device for ELDL=%02x\n", GET_AH(), GET_ELDL());
  9.3499      goto int13_fail;
  9.3500      }
  9.3501 -  
  9.3502 +
  9.3503    switch (GET_AH()) {
  9.3504  
  9.3505      // all those functions return SUCCESS
  9.3506      case 0x00: /* disk controller reset */
  9.3507      case 0x09: /* initialize drive parameters */
  9.3508      case 0x0c: /* seek to specified cylinder */
  9.3509 -    case 0x0d: /* alternate disk reset */  
  9.3510 -    case 0x10: /* check drive ready */    
  9.3511 -    case 0x11: /* recalibrate */      
  9.3512 +    case 0x0d: /* alternate disk reset */
  9.3513 +    case 0x10: /* check drive ready */
  9.3514 +    case 0x11: /* recalibrate */
  9.3515      case 0x14: /* controller internal diagnostic */
  9.3516      case 0x16: /* detect disk change */
  9.3517        goto int13_success;
  9.3518 @@ -5602,7 +5915,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
  9.3519        /* set CF if error status read */
  9.3520        if (status) goto int13_fail_nostatus;
  9.3521        else        goto int13_success_noah;
  9.3522 -      break;      
  9.3523 +      break;
  9.3524  
  9.3525      case 0x15: /* read disk drive size */
  9.3526        SET_AH(0x02);
  9.3527 @@ -5619,11 +5932,11 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
  9.3528      case 0x42: // IBM/MS extended read
  9.3529      case 0x44: // IBM/MS verify sectors
  9.3530      case 0x47: // IBM/MS extended seek
  9.3531 -       
  9.3532 +
  9.3533        count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
  9.3534        segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
  9.3535        offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
  9.3536 - 
  9.3537 +
  9.3538        // Can't use 64 bits lba
  9.3539        lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
  9.3540        if (lba != 0L) {
  9.3541 @@ -5631,13 +5944,13 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
  9.3542          goto int13_fail;
  9.3543          }
  9.3544  
  9.3545 -      // Get 32 bits lba 
  9.3546 +      // Get 32 bits lba
  9.3547        lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
  9.3548  
  9.3549        // If verify or seek
  9.3550        if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
  9.3551          goto int13_success;
  9.3552 -      
  9.3553 +
  9.3554        memsetb(get_SS(),atacmd,0,12);
  9.3555        atacmd[0]=0x28;                      // READ command
  9.3556        atacmd[7]=(count & 0xff00) >> 8;     // Sectors
  9.3557 @@ -5646,7 +5959,7 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
  9.3558        atacmd[3]=(lba & 0x00ff0000) >> 16;
  9.3559        atacmd[4]=(lba & 0x0000ff00) >> 8;
  9.3560        atacmd[5]=(lba & 0x000000ff);
  9.3561 -      status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, ATA_DATA_IN, segment,offset); 
  9.3562 +      status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, ATA_DATA_IN, segment,offset);
  9.3563  
  9.3564        count = (Bit16u)(read_dword(ebda_seg, &EbdaData->ata.trsfbytes) >> 11);
  9.3565        write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
  9.3566 @@ -5693,21 +6006,21 @@ int13_cdrom(EHBX, DS, ES, DI, SI, BP, EL
  9.3567  
  9.3568      case 0x46: // IBM/MS eject media
  9.3569        locks = read_byte(ebda_seg, &EbdaData->ata.devices[device].lock);
  9.3570 -      
  9.3571 +
  9.3572        if (locks != 0) {
  9.3573          SET_AH(0xb1); // media locked
  9.3574          goto int13_fail_noah;
  9.3575          }
  9.3576        // FIXME should handle 0x31 no media in device
  9.3577        // FIXME should handle 0xb5 valid request failed
  9.3578 -    
  9.3579 +
  9.3580        // Call removable media eject
  9.3581        ASM_START
  9.3582          push bp
  9.3583          mov  bp, sp
  9.3584  
  9.3585          mov ah, #0x52
  9.3586 -        int 15
  9.3587 +        int #0x15
  9.3588          mov _int13_cdrom.status + 2[bp], ah
  9.3589          jnc int13_cdrom_rme_end
  9.3590          mov _int13_cdrom.status, #1
  9.3591 @@ -5727,7 +6040,7 @@ int13_cdrom_rme_end:
  9.3592        size = read_word(DS,SI+(Bit16u)&Int13Ext->size);
  9.3593  
  9.3594        // Buffer is too small
  9.3595 -      if(size < 0x1a) 
  9.3596 +      if(size < 0x1a)
  9.3597          goto int13_fail;
  9.3598  
  9.3599        // EDD 1.x
  9.3600 @@ -5742,8 +6055,8 @@ int13_cdrom_rme_end:
  9.3601          write_dword(DS, SI+(Bit16u)&Int13DPT->heads, 0xffffffff);
  9.3602          write_dword(DS, SI+(Bit16u)&Int13DPT->spt, 0xffffffff);
  9.3603          write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, 0xffffffff);  // FIXME should be Bit64
  9.3604 -        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);  
  9.3605 -        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);  
  9.3606 +        write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);
  9.3607 +        write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
  9.3608          }
  9.3609  
  9.3610        // EDD 2.x
  9.3611 @@ -5753,8 +6066,8 @@ int13_cdrom_rme_end:
  9.3612  
  9.3613          write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
  9.3614  
  9.3615 -        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);  
  9.3616 -        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);  
  9.3617 +        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
  9.3618 +        write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
  9.3619  
  9.3620          // Fill in dpte
  9.3621          channel = device / 2;
  9.3622 @@ -5770,7 +6083,7 @@ int13_cdrom_rme_end:
  9.3623          options |= (mode==ATA_MODE_PIO32?1:0<<7);
  9.3624  
  9.3625          write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
  9.3626 -        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
  9.3627 +        write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2 + ATA_CB_DC);
  9.3628          write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 2))<<4 );
  9.3629          write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
  9.3630          write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
  9.3631 @@ -5782,7 +6095,7 @@ int13_cdrom_rme_end:
  9.3632          write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
  9.3633  
  9.3634          checksum=0;
  9.3635 -        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, (&EbdaData->ata.dpte) + i);
  9.3636 +        for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, ((Bit8u*)(&EbdaData->ata.dpte)) + i);
  9.3637          checksum = ~checksum;
  9.3638          write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
  9.3639          }
  9.3640 @@ -5808,7 +6121,7 @@ int13_cdrom_rme_end:
  9.3641            write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
  9.3642            write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
  9.3643            }
  9.3644 -        else { 
  9.3645 +        else {
  9.3646            // FIXME PCI
  9.3647            }
  9.3648          write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
  9.3649 @@ -5821,7 +6134,7 @@ int13_cdrom_rme_end:
  9.3650            write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
  9.3651            write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
  9.3652            }
  9.3653 -        else { 
  9.3654 +        else {
  9.3655            // FIXME PCI
  9.3656            }
  9.3657          write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
  9.3658 @@ -5843,7 +6156,7 @@ int13_cdrom_rme_end:
  9.3659        SET_AH(06);
  9.3660        goto int13_fail_nostatus;
  9.3661        break;
  9.3662 -      
  9.3663 +
  9.3664      case 0x4e: // // IBM/MS set hardware configuration
  9.3665        // DMA, prefetch, PIO maximum not supported
  9.3666        switch (GET_AL()) {
  9.3667 @@ -5905,7 +6218,7 @@ int13_eltorito(DS, ES, DI, SI, BP, SP, B
  9.3668  
  9.3669    BX_DEBUG_INT13_ET("int13_eltorito: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
  9.3670    // BX_DEBUG_INT13_ET("int13_eltorito: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), DS, ES, DI, SI);
  9.3671 -  
  9.3672 +
  9.3673    switch (GET_AH()) {
  9.3674  
  9.3675      // FIXME ElTorito Various. Should be implemented
  9.3676 @@ -5980,11 +6293,10 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
  9.3677    Bit8u  atacmd[12];
  9.3678  
  9.3679    BX_DEBUG_INT13_ET("int13_cdemu: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
  9.3680 -  //BX_DEBUG_INT13_ET("int13_cdemu: SS=%04x ES=%04x DI=%04x SI=%04x\n", get_SS(), ES, DI, SI);
  9.3681 -  
  9.3682 +
  9.3683    /* at this point, we are emulating a floppy/harddisk */
  9.3684 -  
  9.3685 -  // Recompute the device number 
  9.3686 +
  9.3687 +  // Recompute the device number
  9.3688    device  = read_byte(ebda_seg,&EbdaData->cdemu.controller_index) * 2;
  9.3689    device += read_byte(ebda_seg,&EbdaData->cdemu.device_spec);
  9.3690  
  9.3691 @@ -5997,7 +6309,6 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
  9.3692      goto int13_fail;
  9.3693      }
  9.3694  
  9.3695 -  
  9.3696    switch (GET_AH()) {
  9.3697  
  9.3698      // all those functions return SUCCESS
  9.3699 @@ -6006,7 +6317,7 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
  9.3700      case 0x0c: /* seek to specified cylinder */
  9.3701      case 0x0d: /* alternate disk reset */  // FIXME ElTorito Various. should really reset ?
  9.3702      case 0x10: /* check drive ready */     // FIXME ElTorito Various. should check if ready ?
  9.3703 -    case 0x11: /* recalibrate */      
  9.3704 +    case 0x11: /* recalibrate */
  9.3705      case 0x14: /* controller internal diagnostic */
  9.3706      case 0x16: /* detect disk change */
  9.3707        goto int13_success;
  9.3708 @@ -6031,9 +6342,9 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
  9.3709  
  9.3710      case 0x02: // read disk sectors
  9.3711      case 0x04: // verify disk sectors
  9.3712 -      vspt       = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt); 
  9.3713 -      vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders); 
  9.3714 -      vheads     = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads); 
  9.3715 +      vspt       = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
  9.3716 +      vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders);
  9.3717 +      vheads     = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads);
  9.3718  
  9.3719        ilba       = read_dword(ebda_seg,&EbdaData->cdemu.ilba);
  9.3720  
  9.3721 @@ -6062,17 +6373,17 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
  9.3722  
  9.3723        // calculate the virtual lba inside the image
  9.3724        vlba=((((Bit32u)cylinder*(Bit32u)vheads)+(Bit32u)head)*(Bit32u)vspt)+((Bit32u)(sector-1));
  9.3725 - 
  9.3726 +
  9.3727        // In advance so we don't loose the count
  9.3728        SET_AL(nbsectors);
  9.3729  
  9.3730        // start lba on cd
  9.3731 -      slba  = (Bit32u)vlba/4; 
  9.3732 +      slba  = (Bit32u)vlba/4;
  9.3733        before= (Bit16u)vlba%4;
  9.3734  
  9.3735        // end lba on cd
  9.3736        elba = (Bit32u)(vlba+nbsectors-1)/4;
  9.3737 -      
  9.3738 +
  9.3739        memsetb(get_SS(),atacmd,0,12);
  9.3740        atacmd[0]=0x28;                      // READ command
  9.3741        atacmd[7]=((Bit16u)(elba-slba+1) & 0xff00) >> 8; // Sectors
  9.3742 @@ -6092,10 +6403,10 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
  9.3743        break;
  9.3744  
  9.3745      case 0x08: /* read disk drive parameters */
  9.3746 -      vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt); 
  9.3747 -      vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1; 
  9.3748 -      vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1; 
  9.3749 - 
  9.3750 +      vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
  9.3751 +      vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1;
  9.3752 +      vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1;
  9.3753 +
  9.3754        SET_AL( 0x00 );
  9.3755        SET_BL( 0x00 );
  9.3756        SET_CH( vcylinders & 0xff );
  9.3757 @@ -6103,7 +6414,7 @@ int13_cdemu(DS, ES, DI, SI, BP, SP, BX, 
  9.3758        SET_DH( vheads );
  9.3759        SET_DL( 0x02 );   // FIXME ElTorito Various. should send the real count of drives 1 or 2
  9.3760                          // FIXME ElTorito Harddisk. should send the HD count
  9.3761 - 
  9.3762 +
  9.3763        switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
  9.3764          case 0x01: SET_BL( 0x02 ); break;
  9.3765          case 0x02: SET_BL( 0x04 ); break;
  9.3766 @@ -6139,7 +6450,7 @@ ASM_END
  9.3767      case 0x45: // IBM/MS lock/unlock drive
  9.3768      case 0x46: // IBM/MS eject media
  9.3769      case 0x47: // IBM/MS extended seek
  9.3770 -    case 0x48: // IBM/MS get drive parameters 
  9.3771 +    case 0x48: // IBM/MS get drive parameters
  9.3772      case 0x49: // IBM/MS extended media change
  9.3773      case 0x4e: // ? - set hardware configuration
  9.3774      case 0x50: // ? - send packet command
  9.3775 @@ -6227,8 +6538,8 @@ ASM_END
  9.3776  }
  9.3777  
  9.3778    void
  9.3779 -int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
  9.3780 -  Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
  9.3781 +int13_harddisk(EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
  9.3782 +  Bit16u EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
  9.3783  {
  9.3784    Bit8u    drive, num_sectors, sector, head, status, mod;
  9.3785    Bit8u    drive_map;
  9.3786 @@ -6334,7 +6645,7 @@ BX_DEBUG_INT13_HD("int13_f01\n");
  9.3787          }
  9.3788  
  9.3789        if ( (num_sectors > 128) || (num_sectors == 0) )
  9.3790 -        BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
  9.3791 +        BX_PANIC("int13_harddisk: num_sectors out of range!\n");
  9.3792  
  9.3793        if (head > 15)
  9.3794          BX_PANIC("hard drive BIOS:(read/verify) head > 15\n");
  9.3795 @@ -6480,7 +6791,7 @@ BX_DEBUG_INT13_HD("int13_f03\n");
  9.3796          }
  9.3797  
  9.3798        if ( (num_sectors > 128) || (num_sectors == 0) )
  9.3799 -        BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
  9.3800 +        BX_PANIC("int13_harddisk: num_sectors out of range!\n");
  9.3801  
  9.3802        if (head > 15)
  9.3803          BX_PANIC("hard drive BIOS:(read) head > 15\n");
  9.3804 @@ -6590,7 +6901,7 @@ BX_DEBUG_INT13_HD("int13_f05\n");
  9.3805  
  9.3806      case 0x08: /* read disk drive parameters */
  9.3807  BX_DEBUG_INT13_HD("int13_f08\n");
  9.3808 -      
  9.3809 +
  9.3810        drive = GET_ELDL ();
  9.3811        get_hd_geometry(drive, &hd_cylinders, &hd_heads, &hd_sectors);
  9.3812  
  9.3813 @@ -6730,10 +7041,10 @@ ASM_END
  9.3814        break;
  9.3815  
  9.3816      case 0x18: // set media type for format
  9.3817 -    case 0x41: // IBM/MS 
  9.3818 -    case 0x42: // IBM/MS 
  9.3819 -    case 0x43: // IBM/MS 
  9.3820 -    case 0x44: // IBM/MS 
  9.3821 +    case 0x41: // IBM/MS
  9.3822 +    case 0x42: // IBM/MS
  9.3823 +    case 0x43: // IBM/MS
  9.3824 +    case 0x44: // IBM/MS
  9.3825      case 0x45: // IBM/MS lock/unlock drive
  9.3826      case 0x46: // IBM/MS eject media
  9.3827      case 0x47: // IBM/MS extended seek
  9.3828 @@ -6778,7 +7089,7 @@ get_hd_geometry(drive, hd_cylinders, hd_
  9.3829      hd_type = inb_cmos(0x12) & 0x0f;
  9.3830      if (hd_type != 0x0f)
  9.3831        BX_INFO(panic_msg_reg12h,1);
  9.3832 -    hd_type = inb_cmos(0x1a); // HD0: extended type
  9.3833 +    hd_type = inb_cmos(0x1a); // HD1: extended type
  9.3834      if (hd_type != 47)
  9.3835        BX_INFO(panic_msg_reg19h,0,0x1a);
  9.3836      iobase = 0x24;
  9.3837 @@ -6797,11 +7108,72 @@ get_hd_geometry(drive, hd_cylinders, hd_
  9.3838  
  9.3839  #endif //else BX_USE_ATADRV
  9.3840  
  9.3841 +#if BX_SUPPORT_FLOPPY
  9.3842  
  9.3843  //////////////////////
  9.3844  // FLOPPY functions //
  9.3845  //////////////////////
  9.3846  
  9.3847 +void floppy_reset_controller()
  9.3848 +{
  9.3849 +  Bit8u val8;
  9.3850 +
  9.3851 +  // Reset controller
  9.3852 +  val8 = inb(0x03f2);
  9.3853 +  outb(0x03f2, val8 & ~0x04);
  9.3854 +  outb(0x03f2, val8 | 0x04);
  9.3855 +
  9.3856 +  // Wait for controller to come out of reset
  9.3857 +  do {
  9.3858 +    val8 = inb(0x3f4);
  9.3859 +  } while ( (val8 & 0xc0) != 0x80 );
  9.3860 +}
  9.3861 +
  9.3862 +void floppy_prepare_controller(drive)
  9.3863 +  Bit16u drive;
  9.3864 +{
  9.3865 +  Bit8u  val8, dor, prev_reset;
  9.3866 +
  9.3867 +  // set 40:3e bit 7 to 0
  9.3868 +  val8 = read_byte(0x0040, 0x003e);
  9.3869 +  val8 &= 0x7f;
  9.3870 +  write_byte(0x0040, 0x003e, val8);
  9.3871 +
  9.3872 +  // turn on motor of selected drive, DMA & int enabled, normal operation
  9.3873 +  prev_reset = inb(0x03f2) & 0x04;
  9.3874 +  if (drive)
  9.3875 +    dor = 0x20;
  9.3876 +  else
  9.3877 +    dor = 0x10;
  9.3878 +  dor |= 0x0c;
  9.3879 +  dor |= drive;
  9.3880 +  outb(0x03f2, dor);
  9.3881 +
  9.3882 +  // reset the disk motor timeout value of INT 08
  9.3883 +  write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
  9.3884 +
  9.3885 +  // wait for drive readiness
  9.3886 +  do {
  9.3887 +    val8 = inb(0x3f4);
  9.3888 +  } while ( (val8 & 0xc0) != 0x80 );
  9.3889 +
  9.3890 +  if (prev_reset == 0) {
  9.3891 +    // turn on interrupts
  9.3892 +ASM_START
  9.3893 +    sti
  9.3894 +ASM_END
  9.3895 +    // wait on 40:3e bit 7 to become 1
  9.3896 +    do {
  9.3897 +      val8 = read_byte(0x0040, 0x003e);
  9.3898 +    } while ( (val8 & 0x80) == 0 );
  9.3899 +    val8 &= 0x7f;
  9.3900 +ASM_START
  9.3901 +    cli
  9.3902 +ASM_END
  9.3903 +    write_byte(0x0040, 0x003e, val8);
  9.3904 +  }
  9.3905 +}
  9.3906 +
  9.3907    bx_bool
  9.3908  floppy_media_known(drive)
  9.3909    Bit16u drive;
  9.3910 @@ -6908,7 +7280,7 @@ floppy_media_sense(drive)
  9.3911      retval = 1;
  9.3912      }
  9.3913    //
  9.3914 -  // Extended floppy size uses special cmos setting 
  9.3915 +  // Extended floppy size uses special cmos setting
  9.3916    else if ( drive_type == 6 ) {
  9.3917      // 160k 5.25" drive
  9.3918      config_data = 0x00; // 0000 0000
  9.3919 @@ -6949,63 +7321,41 @@ floppy_media_sense(drive)
  9.3920  floppy_drive_recal(drive)
  9.3921    Bit16u drive;
  9.3922  {
  9.3923 -  Bit8u  val8, dor;
  9.3924 +  Bit8u  val8;
  9.3925    Bit16u curr_cyl_offset;
  9.3926  
  9.3927 -  // set 40:3e bit 7 to 0
  9.3928 -  val8 = read_byte(0x0000, 0x043e);
  9.3929 -  val8 &= 0x7f;
  9.3930 -  write_byte(0x0000, 0x043e, val8);
  9.3931 -
  9.3932 -  // turn on motor of selected drive, DMA & int enabled, normal operation
  9.3933 -  if (drive)
  9.3934 -    dor = 0x20;
  9.3935 -  else
  9.3936 -    dor = 0x10;
  9.3937 -  dor |= 0x0c;
  9.3938 -  dor |= drive;
  9.3939 -  outb(0x03f2, dor);
  9.3940 -
  9.3941 -  // reset the disk motor timeout value of INT 08
  9.3942 -  write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
  9.3943 -
  9.3944 -  // check port 3f4 for drive readiness
  9.3945 -  val8 = inb(0x3f4);
  9.3946 -  if ( (val8 & 0xf0) != 0x80 )
  9.3947 -    BX_PANIC("floppy recal:f07: ctrl not ready\n");
  9.3948 +  floppy_prepare_controller(drive);
  9.3949  
  9.3950    // send Recalibrate command (2 bytes) to controller
  9.3951    outb(0x03f5, 0x07);  // 07: Recalibrate
  9.3952    outb(0x03f5, drive); // 0=drive0, 1=drive1
  9.3953  
  9.3954 - // turn on interrupts
  9.3955 +  // turn on interrupts
  9.3956  ASM_START
  9.3957    sti
  9.3958  ASM_END
  9.3959  
  9.3960    // wait on 40:3e bit 7 to become 1
  9.3961 -  val8 = (read_byte(0x0000, 0x043e) & 0x80);
  9.3962 -  while ( val8 == 0 ) {
  9.3963 -    val8 = (read_byte(0x0000, 0x043e) & 0x80);
  9.3964 -    }
  9.3965 -
  9.3966 - val8 = 0; // separate asm from while() loop
  9.3967 - // turn off interrupts
  9.3968 +  do {
  9.3969 +    val8 = (read_byte(0x0040, 0x003e) & 0x80);
  9.3970 +  } while ( val8 == 0 );
  9.3971 +
  9.3972 +  val8 = 0; // separate asm from while() loop
  9.3973 +  // turn off interrupts
  9.3974  ASM_START
  9.3975    cli
  9.3976  ASM_END
  9.3977  
  9.3978    // set 40:3e bit 7 to 0, and calibrated bit
  9.3979 -  val8 = read_byte(0x0000, 0x043e);
  9.3980 +  val8 = read_byte(0x0040, 0x003e);
  9.3981    val8 &= 0x7f;
  9.3982    if (drive) {
  9.3983      val8 |= 0x02; // Drive 1 calibrated
  9.3984      curr_cyl_offset = 0x0095;
  9.3985 -    }
  9.3986 -  else {
  9.3987 +  } else {
  9.3988      val8 |= 0x01; // Drive 0 calibrated
  9.3989      curr_cyl_offset = 0x0094;
  9.3990 -    }
  9.3991 +  }
  9.3992    write_byte(0x0040, 0x003e, val8);
  9.3993    write_byte(0x0040, curr_cyl_offset, 0); // current cylinder is 0
  9.3994  
  9.3995 @@ -7032,7 +7382,6 @@ floppy_drive_exists(drive)
  9.3996      return(1);
  9.3997  }
  9.3998  
  9.3999 -#if BX_SUPPORT_FLOPPY
  9.4000    void
  9.4001  int13_diskette_function(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
  9.4002    Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
  9.4003 @@ -7045,7 +7394,6 @@ int13_diskette_function(DS, ES, DI, SI, 
  9.4004    Bit16u es, last_addr;
  9.4005  
  9.4006    BX_DEBUG_INT13_FL("int13_diskette: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
  9.4007 -  // BX_DEBUG_INT13_FL("int13_diskette: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), get_DS(), ES, DI, SI);
  9.4008  
  9.4009    ah = GET_AH();
  9.4010  
  9.4011 @@ -7058,7 +7406,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
  9.4012          set_diskette_ret_status(1);
  9.4013          SET_CF();
  9.4014          return;
  9.4015 -        }
  9.4016 +      }
  9.4017        drive_type = inb_cmos(0x10);
  9.4018  
  9.4019        if (drive == 0)
  9.4020 @@ -7070,7 +7418,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
  9.4021          set_diskette_ret_status(0x80);
  9.4022          SET_CF();
  9.4023          return;
  9.4024 -        }
  9.4025 +      }
  9.4026        SET_AH(0);
  9.4027        set_diskette_ret_status(0);
  9.4028        CLEAR_CF(); // successful
  9.4029 @@ -7083,7 +7431,7 @@ BX_DEBUG_INT13_FL("floppy f00\n");
  9.4030        SET_AH(val8);
  9.4031        if (val8) {
  9.4032          SET_CF();
  9.4033 -        }
  9.4034 +      }
  9.4035        return;
  9.4036  
  9.4037      case 0x02: // Read Diskette Sectors
  9.4038 @@ -7095,15 +7443,15 @@ BX_DEBUG_INT13_FL("floppy f00\n");
  9.4039        head        = GET_DH();
  9.4040        drive       = GET_ELDL();
  9.4041  
  9.4042 -      if ( (drive > 1) || (head > 1) ||
  9.4043 -           (num_sectors == 0) || (num_sectors > 72) ) {
  9.4044 -BX_INFO("floppy: drive>1 || head>1 ...\n");
  9.4045 +      if ((drive > 1) || (head > 1) || (sector == 0) ||
  9.4046 +          (num_sectors == 0) || (num_sectors > 72)) {
  9.4047 +        BX_INFO("int13_diskette: read/write/verify: parameter out of range\n");
  9.4048          SET_AH(1);
  9.4049          set_diskette_ret_status(1);
  9.4050          SET_AL(0); // no sectors read
  9.4051          SET_CF(); // error occurred
  9.4052          return;
  9.4053 -        }
  9.4054 +      }
  9.4055  
  9.4056        // see if drive exists
  9.4057        if (floppy_drive_exists(drive) == 0) {
  9.4058 @@ -7112,7 +7460,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4059          SET_AL(0); // no sectors read
  9.4060          SET_CF(); // error occurred
  9.4061          return;
  9.4062 -        }
  9.4063 +      }
  9.4064  
  9.4065        // see if media in drive, and type is known
  9.4066        if (floppy_media_known(drive) == 0) {
  9.4067 @@ -7122,8 +7470,8 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4068            SET_AL(0); // no sectors read
  9.4069            SET_CF(); // error occurred
  9.4070            return;
  9.4071 -          }
  9.4072 -        }
  9.4073 +        }
  9.4074 +      }
  9.4075  
  9.4076        if (ah == 0x02) {
  9.4077          // Read Diskette Sectors
  9.4078 @@ -7142,7 +7490,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4079          if ( base_address < base_es ) {
  9.4080            // in case of carry, adjust page by 1
  9.4081            page++;
  9.4082 -          }
  9.4083 +        }
  9.4084          base_count = (num_sectors * 512) - 1;
  9.4085  
  9.4086          // check for 64K boundary overrun
  9.4087 @@ -7153,7 +7501,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4088            SET_AL(0); // no sectors read
  9.4089            SET_CF(); // error occurred
  9.4090            return;
  9.4091 -          }
  9.4092 +        }
  9.4093  
  9.4094          BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
  9.4095          outb(0x000a, 0x06);
  9.4096 @@ -7186,28 +7534,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4097          //--------------------------------------
  9.4098          // set up floppy controller for transfer
  9.4099          //--------------------------------------
  9.4100 -
  9.4101 -        // set 40:3e bit 7 to 0
  9.4102 -        val8 = read_byte(0x0000, 0x043e);
  9.4103 -        val8 &= 0x7f;
  9.4104 -        write_byte(0x0000, 0x043e, val8);
  9.4105 -
  9.4106 -        // turn on motor of selected drive, DMA & int enabled, normal operation
  9.4107 -        if (drive)
  9.4108 -          dor = 0x20;
  9.4109 -        else
  9.4110 -          dor = 0x10;
  9.4111 -        dor |= 0x0c;
  9.4112 -        dor |= drive;
  9.4113 -        outb(0x03f2, dor);
  9.4114 -
  9.4115 -        // reset the disk motor timeout value of INT 08
  9.4116 -        write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
  9.4117 -
  9.4118 -        // check port 3f4 for drive readiness
  9.4119 -        val8 = inb(0x3f4);
  9.4120 -        if ( (val8 & 0xf0) != 0x80 )
  9.4121 -          BX_PANIC("int13_diskette:f02: ctrl not ready\n");
  9.4122 +        floppy_prepare_controller(drive);
  9.4123  
  9.4124          // send read-normal-data command (9 bytes) to controller
  9.4125          outb(0x03f5, 0xe6); // e6: read normal data
  9.4126 @@ -7220,27 +7547,35 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4127          outb(0x03f5, 0); // Gap length
  9.4128          outb(0x03f5, 0xff); // Gap length
  9.4129  
  9.4130 -       // turn on interrupts
  9.4131 +        // turn on interrupts
  9.4132    ASM_START
  9.4133          sti
  9.4134    ASM_END
  9.4135  
  9.4136          // wait on 40:3e bit 7 to become 1
  9.4137 -        val8 = (read_byte(0x0000, 0x043e) & 0x80);
  9.4138 -        while ( val8 == 0 ) {
  9.4139 -          val8 = (read_byte(0x0000, 0x043e) & 0x80);
  9.4140 +        do {
  9.4141 +          val8 = read_byte(0x0040, 0x0040);
  9.4142 +          if (val8 == 0) {
  9.4143 +            floppy_reset_controller();
  9.4144 +            SET_AH(0x80); // drive not ready (timeout)
  9.4145 +            set_diskette_ret_status(0x80);
  9.4146 +            SET_AL(0); // no sectors read
  9.4147 +            SET_CF(); // error occurred
  9.4148 +            return;
  9.4149            }
  9.4150 -
  9.4151 -       val8 = 0; // separate asm from while() loop
  9.4152 -       // turn off interrupts
  9.4153 +          val8 = (read_byte(0x0040, 0x003e) & 0x80);
  9.4154 +        } while ( val8 == 0 );
  9.4155 +
  9.4156 +        val8 = 0; // separate asm from while() loop
  9.4157 +        // turn off interrupts
  9.4158    ASM_START
  9.4159          cli
  9.4160    ASM_END
  9.4161  
  9.4162          // set 40:3e bit 7 to 0
  9.4163 -        val8 = read_byte(0x0000, 0x043e);
  9.4164 +        val8 = read_byte(0x0040, 0x003e);
  9.4165          val8 &= 0x7f;
  9.4166 -        write_byte(0x0000, 0x043e, val8);
  9.4167 +        write_byte(0x0040, 0x003e, val8);
  9.4168  
  9.4169          // check port 3f4 for accessibility to status bytes
  9.4170          val8 = inb(0x3f4);
  9.4171 @@ -7271,7 +7606,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4172            SET_AL(0); // no sectors read
  9.4173            SET_CF(); // error occurred
  9.4174            return;
  9.4175 -          }
  9.4176 +        }
  9.4177  
  9.4178          // ??? should track be new val from return_status[3] ?
  9.4179          set_diskette_current_cyl(drive, track);
  9.4180 @@ -7279,8 +7614,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4181          SET_AH(0x00); // success
  9.4182          CLEAR_CF();   // success
  9.4183          return;
  9.4184 -        }
  9.4185 -      else if (ah == 0x03) {
  9.4186 +      } else if (ah == 0x03) {
  9.4187          // Write Diskette Sectors
  9.4188  
  9.4189          //-----------------------------------
  9.4190 @@ -7297,7 +7631,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4191          if ( base_address < base_es ) {
  9.4192            // in case of carry, adjust page by 1
  9.4193            page++;
  9.4194 -          }
  9.4195 +        }
  9.4196          base_count = (num_sectors * 512) - 1;
  9.4197  
  9.4198          // check for 64K boundary overrun
  9.4199 @@ -7308,7 +7642,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4200            SET_AL(0); // no sectors read
  9.4201            SET_CF(); // error occurred
  9.4202            return;
  9.4203 -          }
  9.4204 +        }
  9.4205  
  9.4206          BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
  9.4207          outb(0x000a, 0x06);
  9.4208 @@ -7334,30 +7668,9 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4209          //--------------------------------------
  9.4210          // set up floppy controller for transfer
  9.4211          //--------------------------------------
  9.4212 -
  9.4213 -        // set 40:3e bit 7 to 0
  9.4214 -        val8 = read_byte(0x0000, 0x043e);
  9.4215 -        val8 &= 0x7f;
  9.4216 -        write_byte(0x0000, 0x043e, val8);
  9.4217 -
  9.4218 -        // turn on motor of selected drive, DMA & int enabled, normal operation
  9.4219 -        if (drive)
  9.4220 -          dor = 0x20;
  9.4221 -        else
  9.4222 -          dor = 0x10;
  9.4223 -        dor |= 0x0c;
  9.4224 -        dor |= drive;
  9.4225 -        outb(0x03f2, dor);
  9.4226 -
  9.4227 -        // reset the disk motor timeout value of INT 08
  9.4228 -        write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
  9.4229 -
  9.4230 -        // check port 3f4 for drive readiness
  9.4231 -        val8 = inb(0x3f4);
  9.4232 -        if ( (val8 & 0xf0) != 0x80 )
  9.4233 -          BX_PANIC("int13_diskette:f03: ctrl not ready\n");
  9.4234 -
  9.4235 -        // send read-normal-data command (9 bytes) to controller
  9.4236 +        floppy_prepare_controller(drive);
  9.4237 +
  9.4238 +        // send write-normal-data command (9 bytes) to controller
  9.4239          outb(0x03f5, 0xc5); // c5: write normal data
  9.4240          outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
  9.4241          outb(0x03f5, track);
  9.4242 @@ -7368,27 +7681,35 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4243          outb(0x03f5, 0); // Gap length
  9.4244          outb(0x03f5, 0xff); // Gap length
  9.4245  
  9.4246 -       // turn on interrupts
  9.4247 +        // turn on interrupts
  9.4248    ASM_START
  9.4249          sti
  9.4250    ASM_END
  9.4251  
  9.4252          // wait on 40:3e bit 7 to become 1
  9.4253 -        val8 = (read_byte(0x0000, 0x043e) & 0x80);
  9.4254 -        while ( val8 == 0 ) {
  9.4255 -          val8 = (read_byte(0x0000, 0x043e) & 0x80);
  9.4256 +        do {
  9.4257 +          val8 = read_byte(0x0040, 0x0040);
  9.4258 +          if (val8 == 0) {
  9.4259 +            floppy_reset_controller();
  9.4260 +            SET_AH(0x80); // drive not ready (timeout)
  9.4261 +            set_diskette_ret_status(0x80);
  9.4262 +            SET_AL(0); // no sectors written
  9.4263 +            SET_CF(); // error occurred
  9.4264 +            return;
  9.4265            }
  9.4266 -
  9.4267 -       val8 = 0; // separate asm from while() loop
  9.4268 -       // turn off interrupts
  9.4269 +          val8 = (read_byte(0x0040, 0x003e) & 0x80);
  9.4270 +        } while ( val8 == 0 );
  9.4271 +
  9.4272 +        val8 = 0; // separate asm from while() loop
  9.4273 +        // turn off interrupts
  9.4274    ASM_START
  9.4275          cli
  9.4276    ASM_END
  9.4277  
  9.4278          // set 40:3e bit 7 to 0
  9.4279 -        val8 = read_byte(0x0000, 0x043e);
  9.4280 +        val8 = read_byte(0x0040, 0x003e);
  9.4281          val8 &= 0x7f;
  9.4282 -        write_byte(0x0000, 0x043e, val8);
  9.4283 +        write_byte(0x0040, 0x003e, val8);
  9.4284  
  9.4285          // check port 3f4 for accessibility to status bytes
  9.4286          val8 = inb(0x3f4);
  9.4287 @@ -7432,8 +7753,7 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4288          SET_AH(0x00); // success
  9.4289          CLEAR_CF();   // success
  9.4290          return;
  9.4291 -        }
  9.4292 -      else {  // if (ah == 0x04)
  9.4293 +      } else {  // if (ah == 0x04)
  9.4294          // Verify Diskette Sectors
  9.4295  
  9.4296          // ??? should track be new val from return_status[3] ?
  9.4297 @@ -7442,8 +7762,8 @@ BX_INFO("floppy: drive>1 || head>1 ...\n
  9.4298          CLEAR_CF();   // success
  9.4299          SET_AH(0x00); // success
  9.4300          return;
  9.4301 -        }
  9.4302 -
  9.4303 +      }
  9.4304 +      break;
  9.4305  
  9.4306      case 0x05: // format diskette track
  9.4307  BX_DEBUG_INT13_FL("floppy f05\n");
  9.4308 @@ -7458,7 +7778,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
  9.4309          SET_AH(1);
  9.4310          set_diskette_ret_status(1);
  9.4311          SET_CF(); // error occurred
  9.4312 -        }
  9.4313 +      }
  9.4314  
  9.4315        // see if drive exists
  9.4316        if (floppy_drive_exists(drive) == 0) {
  9.4317 @@ -7466,7 +7786,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
  9.4318          set_diskette_ret_status(0x80);
  9.4319          SET_CF(); // error occurred
  9.4320          return;
  9.4321 -        }
  9.4322 +      }
  9.4323  
  9.4324        // see if media in drive, and type is known
  9.4325        if (floppy_media_known(drive) == 0) {
  9.4326 @@ -7476,8 +7796,8 @@ BX_DEBUG_INT13_FL("floppy f05\n");
  9.4327            SET_AL(0); // no sectors read
  9.4328            SET_CF(); // error occurred
  9.4329            return;
  9.4330 -          }
  9.4331 -        }
  9.4332 +        }
  9.4333 +      }
  9.4334  
  9.4335        // set up DMA controller for transfer
  9.4336        page = (ES >> 12);   // upper 4 bits
  9.4337 @@ -7487,7 +7807,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
  9.4338        if ( base_address < base_es ) {
  9.4339          // in case of carry, adjust page by 1
  9.4340          page++;
  9.4341 -        }
  9.4342 +      }
  9.4343        base_count = (num_sectors * 4) - 1;
  9.4344  
  9.4345        // check for 64K boundary overrun
  9.4346 @@ -7498,7 +7818,7 @@ BX_DEBUG_INT13_FL("floppy f05\n");
  9.4347          SET_AL(0); // no sectors read
  9.4348          SET_CF(); // error occurred
  9.4349          return;
  9.4350 -        }
  9.4351 +      }
  9.4352  
  9.4353        outb(0x000a, 0x06);
  9.4354        outb(0x000c, 0x00); // clear flip-flop
  9.4355 @@ -7515,27 +7835,9 @@ BX_DEBUG_INT13_FL("floppy f05\n");
  9.4356        outb(0x000a, 0x02);
  9.4357  
  9.4358        // set up floppy controller for transfer
  9.4359 -      val8 = read_byte(0x0000, 0x043e);
  9.4360 -      val8 &= 0x7f;
  9.4361 -      write_byte(0x0000, 0x043e, val8);
  9.4362 -      // turn on motor of selected drive, DMA & int enabled, normal operation
  9.4363 -      if (drive)
  9.4364 -        dor = 0x20;
  9.4365 -      else
  9.4366 -        dor = 0x10;
  9.4367 -      dor |= 0x0c;
  9.4368 -      dor |= drive;
  9.4369 -      outb(0x03f2, dor);
  9.4370 -
  9.4371 -      // reset the disk motor timeout value of INT 08
  9.4372 -      write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
  9.4373 -
  9.4374 -      // check port 3f4 for drive readiness
  9.4375 -      val8 = inb(0x3f4);
  9.4376 -      if ( (val8 & 0xf0) != 0x80 )
  9.4377 -        BX_PANIC("int13_diskette:f05: ctrl not ready\n");
  9.4378 -
  9.4379 -      // send read-normal-data command (6 bytes) to controller
  9.4380 +      floppy_prepare_controller(drive);
  9.4381 +
  9.4382 +      // send format-track command (6 bytes) to controller
  9.4383        outb(0x03f5, 0x4d); // 4d: format track
  9.4384        outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
  9.4385        outb(0x03f5, 2); // 512 byte sector size
  9.4386 @@ -7546,20 +7848,29 @@ BX_DEBUG_INT13_FL("floppy f05\n");
  9.4387    ASM_START
  9.4388        sti
  9.4389    ASM_END
  9.4390 +
  9.4391        // wait on 40:3e bit 7 to become 1
  9.4392 -      val8 = (read_byte(0x0000, 0x043e) & 0x80);
  9.4393 -      while ( val8 == 0 ) {
  9.4394 -        val8 = (read_byte(0x0000, 0x043e) & 0x80);
  9.4395 -        }
  9.4396 -     val8 = 0; // separate asm from while() loop
  9.4397 -     // turn off interrupts
  9.4398 +      do {
  9.4399 +        val8 = read_byte(0x0040, 0x0040);
  9.4400 +        if (val8 == 0) {
  9.4401 +          floppy_reset_controller();
  9.4402 +          SET_AH(0x80); // drive not ready (timeout)
  9.4403 +          set_diskette_ret_status(0x80);
  9.4404 +          SET_CF(); // error occurred
  9.4405 +          return;
  9.4406 +        }
  9.4407 +        val8 = (read_byte(0x0040, 0x003e) & 0x80);
  9.4408 +      } while ( val8 == 0 );
  9.4409 +
  9.4410 +      val8 = 0; // separate asm from while() loop
  9.4411 +      // turn off interrupts
  9.4412    ASM_START
  9.4413        cli
  9.4414    ASM_END
  9.4415        // set 40:3e bit 7 to 0
  9.4416 -      val8 = read_byte(0x0000, 0x043e);
  9.4417 +      val8 = read_byte(0x0040, 0x003e);
  9.4418        val8 &= 0x7f;
  9.4419 -      write_byte(0x0000, 0x043e, val8);
  9.4420 +      write_byte(0x0040, 0x003e, val8);
  9.4421        // check port 3f4 for accessibility to status bytes
  9.4422        val8 = inb(0x3f4);
  9.4423        if ( (val8 & 0xc0) != 0xc0 )
  9.4424 @@ -7911,8 +8222,9 @@ Bit16u seq_nr;
  9.4425    Bit16u bootseg;
  9.4426    Bit16u bootip;
  9.4427    Bit16u status;
  9.4428 -
  9.4429 -  struct ipl_entry e;
  9.4430 +  Bit16u bootfirst;
  9.4431 +
  9.4432 +  ipl_entry_t e;
  9.4433  
  9.4434    // if BX_ELTORITO_BOOT is not defined, old behavior
  9.4435    //   check bit 5 in CMOS reg 0x2d.  load either 0x00 or 0x80 into DL
  9.4436 @@ -7926,7 +8238,7 @@ Bit16u seq_nr;
  9.4437    //     CMOS reg 0x38 & 0xf0 : 3rd boot device
  9.4438    //   boot device codes:
  9.4439    //     0x00 : not defined
  9.4440 -  //     0x01 : first floppy 
  9.4441 +  //     0x01 : first floppy
  9.4442    //     0x02 : first harddrive
  9.4443    //     0x03 : first cdrom
  9.4444    //     0x04 - 0x0f : PnP expansion ROMs (e.g. Etherboot)
  9.4445 @@ -7938,16 +8250,25 @@ Bit16u seq_nr;
  9.4446    bootdev |= ((inb_cmos(0x38) & 0xf0) << 4);
  9.4447    bootdev >>= 4 * seq_nr;
  9.4448    bootdev &= 0xf;
  9.4449 -  if (bootdev == 0) BX_PANIC("No bootable device.\n");
  9.4450 -  
  9.4451 +
  9.4452 +  /* Read user selected device */
  9.4453 +  bootfirst = read_word(ebda_seg, IPL_BOOTFIRST_OFFSET);
  9.4454 +  if (bootfirst != 0xFFFF) {
  9.4455 +    bootdev = bootfirst;
  9.4456 +    /* User selected device not set */
  9.4457 +    write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, 0xFFFF);
  9.4458 +    /* Reset boot sequence */
  9.4459 +    write_word(ebda_seg, IPL_SEQUENCE_OFFSET, 0xFFFF);
  9.4460 +  } else if (bootdev == 0) BX_PANIC("No bootable device.\n");
  9.4461 +
  9.4462    /* Translate from CMOS runes to an IPL table offset by subtracting 1 */
  9.4463    bootdev -= 1;
  9.4464 -#else  
  9.4465 +#else
  9.4466    if (seq_nr ==2) BX_PANIC("No more boot devices.");
  9.4467 -  if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1)) 
  9.4468 +  if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1))
  9.4469        /* Boot from floppy if the bit is set or it's the second boot */
  9.4470      bootdev = 0x00;
  9.4471 -  else 
  9.4472 +  else
  9.4473      bootdev = 0x01;
  9.4474  #endif
  9.4475  
  9.4476 @@ -7959,13 +8280,13 @@ Bit16u seq_nr;
  9.4477  
  9.4478    /* Do the loading, and set up vector as a far pointer to the boot
  9.4479     * address, and bootdrv as the boot drive */
  9.4480 -  print_boot_device(e.type);
  9.4481 +  print_boot_device(&e);
  9.4482  
  9.4483    switch(e.type) {
  9.4484 -  case 0x01: /* FDD */
  9.4485 -  case 0x02: /* HDD */
  9.4486 -
  9.4487 -    bootdrv = (e.type == 0x02) ? 0x80 : 0x00;
  9.4488 +  case IPL_TYPE_FLOPPY: /* FDD */
  9.4489 +  case IPL_TYPE_HARDDISK: /* HDD */
  9.4490 +
  9.4491 +    bootdrv = (e.type == IPL_TYPE_HARDDISK) ? 0x80 : 0x00;
  9.4492      bootseg = 0x07c0;
  9.4493      status = 0;
  9.4494  
  9.4495 @@ -7980,7 +8301,7 @@ ASM_START
  9.4496      mov  dl, _int18_function.bootdrv + 2[bp]
  9.4497      mov  ax, _int18_function.bootseg + 2[bp]
  9.4498      mov  es, ax         ;; segment
  9.4499 -    mov  bx, #0x0000    ;; offset
  9.4500 +    xor  bx, bx         ;; offset
  9.4501      mov  ah, #0x02      ;; function 2, read diskette sector
  9.4502      mov  al, #0x01      ;; read 1 sector
  9.4503      mov  ch, #0x00      ;; track 0
  9.4504 @@ -7998,7 +8319,7 @@ int19_load_done:
  9.4505      pop  ax
  9.4506      pop  bp
  9.4507  ASM_END
  9.4508 -    
  9.4509 +
  9.4510      if (status != 0) {
  9.4511        print_boot_failure(e.type, 1);
  9.4512        return;
  9.4513 @@ -8006,7 +8327,7 @@ ASM_END
  9.4514  
  9.4515      /* Always check the signature on a HDD boot sector; on FDD, only do
  9.4516       * the check if the CMOS doesn't tell us to skip it */
  9.4517 -    if (e.type != 0x00 || !((inb_cmos(0x38) & 0x01))) {
  9.4518 +    if ((e.type != IPL_TYPE_FLOPPY) || !((inb_cmos(0x38) & 0x01))) {
  9.4519        if (read_word(bootseg,0x1fe) != 0xaa55) {
  9.4520          print_boot_failure(e.type, 0);
  9.4521          return;
  9.4522 @@ -8024,7 +8345,7 @@ ASM_END
  9.4523    break;
  9.4524  
  9.4525  #if BX_ELTORITO_BOOT
  9.4526 -  case 0x03: /* CD-ROM */
  9.4527 +  case IPL_TYPE_CDROM: /* CD-ROM */
  9.4528      status = cdrom_boot();
  9.4529  
  9.4530      // If failure
  9.4531 @@ -8043,7 +8364,7 @@ ASM_END
  9.4532      break;
  9.4533  #endif
  9.4534  
  9.4535 -  case 0x80: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */
  9.4536 +  case IPL_TYPE_BEV: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */
  9.4537      bootseg = e.vector >> 16;
  9.4538      bootip = e.vector & 0xffff;
  9.4539      break;
  9.4540 @@ -8051,16 +8372,20 @@ ASM_END
  9.4541    default: return;
  9.4542    }
  9.4543  
  9.4544 -  
  9.4545 +  /* Debugging info */
  9.4546 +  BX_INFO("Booting from %x:%x\n", bootseg, bootip);
  9.4547 +
  9.4548    /* Jump to the boot vector */
  9.4549  ASM_START
  9.4550      mov  bp, sp
  9.4551 +//    push cs
  9.4552 +//    push #int18_handler
  9.4553      ;; Build an iret stack frame that will take us to the boot vector.
  9.4554      ;; iret pops ip, then cs, then flags, so push them in the opposite order.
  9.4555      pushf
  9.4556 -    mov  ax, _int18_function.bootseg + 0[bp] 
  9.4557 +    mov  ax, _int18_function.bootseg + 0[bp]
  9.4558      push ax
  9.4559 -    mov  ax, _int18_function.bootip + 0[bp] 
  9.4560 +    mov  ax, _int18_function.bootip + 0[bp]
  9.4561      push ax
  9.4562      ;; Set the magic number in ax and the boot drive in dl.
  9.4563      mov  ax, #0xaa55
  9.4564 @@ -8263,7 +8588,11 @@ int1a_function(regs, ds, iret_addr)
  9.4565        } else if (regs.u.r8.bl == 0x83) {
  9.4566          BX_INFO("bad PCI vendor ID %04x\n", regs.u.r16.dx);
  9.4567        } else if (regs.u.r8.bl == 0x86) {
  9.4568 -        BX_INFO("PCI device %04x:%04x not found\n", regs.u.r16.dx, regs.u.r16.cx);
  9.4569 +        if (regs.u.r8.al == 0x02) {
  9.4570 +          BX_INFO("PCI device %04x:%04x not found at index %d\n", regs.u.r16.dx, regs.u.r16.cx, regs.u.r16.si);
  9.4571 +        } else {
  9.4572 +          BX_INFO("no PCI device with class code 0x%02x%04x found at index %d\n", regs.u.r8.cl, regs.u.r16.dx, regs.u.r16.si);
  9.4573 +        }
  9.4574        }
  9.4575        regs.u.r8.ah = regs.u.r8.bl;
  9.4576        SetCF(iret_addr.flags);
  9.4577 @@ -8309,11 +8638,11 @@ ASM_END
  9.4578            // Done waiting.
  9.4579            Bit16u segment, offset;
  9.4580  
  9.4581 -          offset = read_word( 0x40, 0x98 );
  9.4582 -          segment = read_word( 0x40, 0x9A );
  9.4583 +          segment = read_word( 0x40, 0x98 );
  9.4584 +          offset = read_word( 0x40, 0x9A );
  9.4585            write_byte( 0x40, 0xA0, 0 );  // Turn of status byte.
  9.4586            outb_cmos( 0xB, registerB & 0x37 ); // Clear the Periodic Interrupt.
  9.4587 -          write_byte( segment, offset, 0x80 );  // Write to specified flag byte.
  9.4588 +          write_byte(segment, offset, read_byte(segment, offset) | 0x80 );  // Write to specified flag byte.
  9.4589          } else {
  9.4590            // Continue waiting.
  9.4591            time -= 0x3D1;
  9.4592 @@ -8521,13 +8850,18 @@ int13_notcdrom:
  9.4593  #endif
  9.4594  
  9.4595  int13_disk:
  9.4596 +  ;; int13_harddisk modifies high word of EAX
  9.4597 +  shr   eax, #16
  9.4598 +  push  ax
  9.4599    call  _int13_harddisk
  9.4600 +  pop   ax
  9.4601 +  shl   eax, #16
  9.4602  
  9.4603  int13_out:
  9.4604    pop ds
  9.4605    pop es
  9.4606    popa
  9.4607 -  iret 
  9.4608 +  iret
  9.4609  
  9.4610  ;----------
  9.4611  ;- INT18h -
  9.4612 @@ -8540,18 +8874,19 @@ int18_handler: ;; Boot Failure recovery:
  9.4613    xor  ax, ax
  9.4614    mov  ss, ax
  9.4615  
  9.4616 -  ;; Get the boot sequence number out of the IPL memory
  9.4617    ;; The first time we do this it will have been set to -1 so 
  9.4618    ;; we will start from device 0.
  9.4619 -  mov  bx, #IPL_SEG 
  9.4620 +  mov  ds, ax
  9.4621 +  mov  bx, word ptr [0x40E]       ;; EBDA segment
  9.4622    mov  ds, bx                     ;; Set segment
  9.4623    mov  bx, IPL_SEQUENCE_OFFSET    ;; BX is now the sequence number
  9.4624    inc  bx                         ;; ++
  9.4625    mov  IPL_SEQUENCE_OFFSET, bx    ;; Write it back
  9.4626 -  mov  ds, ax                     ;; and reset the segment to zero. 
  9.4627 +  mov  ds, ax                     ;; and reset the segment to zero.
  9.4628  
  9.4629    ;; Call the C code for the next boot device
  9.4630    push bx
  9.4631 +
  9.4632    call _int18_function
  9.4633  
  9.4634    ;; Boot failed: invoke the boot recovery function...
  9.4635 @@ -8561,6 +8896,7 @@ int18_handler: ;; Boot Failure recovery:
  9.4636  ;- INT19h -
  9.4637  ;----------
  9.4638  int19_relocated: ;; Boot function, relocated
  9.4639 +
  9.4640    ;;
  9.4641    ;; *** Warning: INT 19h resets the whole machine *** 
  9.4642    ;;
  9.4643 @@ -8572,10 +8908,12 @@ int19_relocated: ;; Boot function, reloc
  9.4644    ;; boot sequence will start, which is more or less the required behaviour.
  9.4645    ;; 
  9.4646    ;; Reset SP and SS
  9.4647 +
  9.4648    mov  ax, #0xfffe
  9.4649    mov  sp, ax
  9.4650    xor  ax, ax
  9.4651    mov  ss, ax
  9.4652 +
  9.4653    call _machine_reset
  9.4654  
  9.4655  ;----------
  9.4656 @@ -8589,7 +8927,7 @@ int1c_handler: ;; User Timer Tick
  9.4657  ;- POST: Floppy Drive -
  9.4658  ;----------------------
  9.4659  floppy_drive_post:
  9.4660 -  mov  ax, #0x0000
  9.4661 +  xor  ax, ax
  9.4662    mov  ds, ax
  9.4663  
  9.4664    mov  al, #0x00
  9.4665 @@ -8671,7 +9009,7 @@ hard_drive_post:
  9.4666    mov  dx, #0x03f6
  9.4667    out  dx, al
  9.4668  
  9.4669 -  mov  ax, #0x0000
  9.4670 +  xor  ax, ax
  9.4671    mov  ds, ax
  9.4672    mov  0x0474, al /* hard disk status of last operation */
  9.4673    mov  0x0477, al /* hard disk port offset (XT only ???) */
  9.4674 @@ -8686,8 +9024,8 @@ hard_drive_post:
  9.4675    SET_INT_VECTOR(0x76, #0xF000, #int76_handler)
  9.4676    ;; INT 41h: hard disk 0 configuration pointer
  9.4677    ;; INT 46h: hard disk 1 configuration pointer
  9.4678 -  SET_INT_VECTOR(0x41, #EBDA_SEG, #0x003D)
  9.4679 -  SET_INT_VECTOR(0x46, #EBDA_SEG, #0x004D)
  9.4680 +  SET_INT_VECTOR(0x41, word ptr [0x40E], #0x003D) /* EBDA:003D */
  9.4681 +  SET_INT_VECTOR(0x46, word ptr [0x40E], #0x004D) /* EBDA:004D */
  9.4682  
  9.4683    ;; move disk geometry data from CMOS to EBDA disk parameter table(s)
  9.4684    mov  al, #0x12
  9.4685 @@ -8716,7 +9054,9 @@ post_d0_type47:
  9.4686    ;; 22    landing zone high        D
  9.4687    ;; 23    sectors/track            E
  9.4688  
  9.4689 -  mov  ax, #EBDA_SEG
  9.4690 +  xor  ax, ax
  9.4691 +  mov  ds, ax
  9.4692 +  mov  ax, word ptr [0x40E] ;; EBDA segment
  9.4693    mov  ds, ax
  9.4694  
  9.4695    ;;; Filling EBDA table for hard disk 0.
  9.4696 @@ -8862,7 +9202,9 @@ post_d1_type47:
  9.4697    ;; 0x2b    landing zone high        D
  9.4698    ;; 0x2c    sectors/track            E
  9.4699  ;;; Fill EBDA table for hard disk 1.
  9.4700 -  mov  ax, #EBDA_SEG
  9.4701 +  xor  ax, ax
  9.4702 +  mov  ds, ax
  9.4703 +  mov  ax, word ptr [0x40E] ;; EBDA segment
  9.4704    mov  ds, ax
  9.4705    mov  al, #0x28
  9.4706    out  #0x70, al
  9.4707 @@ -8993,13 +9335,42 @@ ebda_post:
  9.4708  ;--------------------
  9.4709  ; relocated here because the primary POST area isnt big enough.
  9.4710  eoi_jmp_post:
  9.4711 -  call eoi_both_pics
  9.4712 -
  9.4713 +  mov   al, #0x20
  9.4714 +  out   #0xA0, al ;; slave  PIC EOI
  9.4715 +  mov   al, #0x20
  9.4716 +  out   #0x20, al ;; master PIC EOI
  9.4717 +
  9.4718 +jmp_post_0x467:
  9.4719    xor ax, ax
  9.4720    mov ds, ax
  9.4721  
  9.4722    jmp far ptr [0x467]
  9.4723  
  9.4724 +iret_post_0x467:
  9.4725 +  xor ax, ax
  9.4726 +  mov ds, ax
  9.4727 +
  9.4728 +  mov sp, [0x467]
  9.4729 +  mov ss, [0x469]
  9.4730 +  iret
  9.4731 +
  9.4732 +retf_post_0x467:
  9.4733 +  xor ax, ax
  9.4734 +  mov ds, ax
  9.4735 +
  9.4736 +  mov sp, [0x467]
  9.4737 +  mov ss, [0x469]
  9.4738 +  retf
  9.4739 +
  9.4740 +s3_post:
  9.4741 +#if BX_ROMBIOS32
  9.4742 +  call rombios32_init
  9.4743 +#endif
  9.4744 +  call _s3_resume
  9.4745 +  mov bl, #0x00
  9.4746 +  and ax, ax
  9.4747 +  jz normal_post
  9.4748 +  call _s3_resume_panic
  9.4749  
  9.4750  ;--------------------
  9.4751  eoi_both_pics:
  9.4752 @@ -9152,16 +9523,22 @@ bios32_structure:
  9.4753  
  9.4754  .align 16
  9.4755  bios32_entry_point:
  9.4756 -  pushf
  9.4757 -  cmp eax, #0x49435024
  9.4758 +  pushfd
  9.4759 +  cmp eax, #0x49435024 ;; "$PCI"
  9.4760    jne unknown_service
  9.4761    mov eax, #0x80000000
  9.4762    mov dx, #0x0cf8
  9.4763    out dx, eax
  9.4764    mov dx, #0x0cfc
  9.4765    in  eax, dx
  9.4766 -  cmp eax, #0x12378086
  9.4767 +#ifdef PCI_FIXED_HOST_BRIDGE
  9.4768 +  cmp eax, #PCI_FIXED_HOST_BRIDGE
  9.4769    jne unknown_service
  9.4770 +#else
  9.4771 +  ;; say ok if a device is present
  9.4772 +  cmp eax, #0xffffffff
  9.4773 +  je unknown_service
  9.4774 +#endif
  9.4775    mov ebx, #0x000f0000
  9.4776    mov ecx, #0
  9.4777    mov edx, #pcibios_protected
  9.4778 @@ -9170,12 +9547,15 @@ bios32_entry_point:
  9.4779  unknown_service:
  9.4780    mov al, #0x80
  9.4781  bios32_end:
  9.4782 -  popf
  9.4783 +#ifdef BX_QEMU
  9.4784 +  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
  9.4785 +#endif
  9.4786 +  popfd
  9.4787    retf
  9.4788  
  9.4789  .align 16
  9.4790  pcibios_protected:
  9.4791 -  pushf
  9.4792 +  pushfd
  9.4793    cli
  9.4794    push esi
  9.4795    push edi
  9.4796 @@ -9183,15 +9563,15 @@ pcibios_protected:
  9.4797    jne pci_pro_f02
  9.4798    mov bx, #0x0210
  9.4799    mov cx, #0
  9.4800 -  mov edx, #0x20494350
  9.4801 +  mov edx, #0x20494350 ;; "PCI "
  9.4802    mov al, #0x01
  9.4803    jmp pci_pro_ok
  9.4804  pci_pro_f02: ;; find pci device
  9.4805    cmp al, #0x02
  9.4806 -  jne pci_pro_f08
  9.4807 +  jne pci_pro_f03
  9.4808    shl ecx, #16
  9.4809    mov cx, dx
  9.4810 -  mov bx, #0x0000
  9.4811 +  xor bx, bx
  9.4812    mov di, #0x00
  9.4813  pci_pro_devloop:
  9.4814    call pci_pro_select_reg
  9.4815 @@ -9208,6 +9588,27 @@ pci_pro_nextdev:
  9.4816    jne pci_pro_devloop
  9.4817    mov ah, #0x86
  9.4818    jmp pci_pro_fail
  9.4819 +pci_pro_f03: ;; find class code
  9.4820 +  cmp al, #0x03
  9.4821 +  jne pci_pro_f08
  9.4822 +  xor bx, bx
  9.4823 +  mov di, #0x08
  9.4824 +pci_pro_devloop2:
  9.4825 +  call pci_pro_select_reg
  9.4826 +  mov dx, #0x0cfc
  9.4827 +  in  eax, dx
  9.4828 +  shr eax, #8
  9.4829 +  cmp eax, ecx
  9.4830 +  jne pci_pro_nextdev2
  9.4831 +  cmp si, #0
  9.4832 +  je  pci_pro_ok
  9.4833 +  dec si
  9.4834 +pci_pro_nextdev2:
  9.4835 +  inc bx
  9.4836 +  cmp bx, #0x0100
  9.4837 +  jne pci_pro_devloop2
  9.4838 +  mov ah, #0x86
  9.4839 +  jmp pci_pro_fail
  9.4840  pci_pro_f08: ;; read configuration byte
  9.4841    cmp al, #0x08
  9.4842    jne pci_pro_f09
  9.4843 @@ -9281,16 +9682,20 @@ pci_pro_unknown:
  9.4844  pci_pro_fail:
  9.4845    pop edi
  9.4846    pop esi
  9.4847 -  sti
  9.4848 -  popf
  9.4849 +#ifdef BX_QEMU
  9.4850 +  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
  9.4851 +#endif
  9.4852 +  popfd
  9.4853    stc
  9.4854    retf
  9.4855  pci_pro_ok:
  9.4856    xor ah, ah
  9.4857    pop edi
  9.4858    pop esi
  9.4859 -  sti
  9.4860 -  popf
  9.4861 +#ifdef BX_QEMU
  9.4862 +  and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
  9.4863 +#endif
  9.4864 +  popfd
  9.4865    clc
  9.4866    retf
  9.4867  
  9.4868 @@ -9317,8 +9722,14 @@ pcibios_real:
  9.4869    out dx, eax
  9.4870    mov dx, #0x0cfc
  9.4871    in  eax, dx
  9.4872 -  cmp eax, #0x12378086
  9.4873 +#ifdef PCI_FIXED_HOST_BRIDGE
  9.4874 +  cmp eax, #PCI_FIXED_HOST_BRIDGE
  9.4875    je  pci_present
  9.4876 +#else
  9.4877 +  ;; say ok if a device is present
  9.4878 +  cmp eax, #0xffffffff
  9.4879 +  jne  pci_present
  9.4880 +#endif
  9.4881    pop dx
  9.4882    pop eax
  9.4883    mov ah, #0xff
  9.4884 @@ -9332,7 +9743,7 @@ pci_present:
  9.4885    mov ax, #0x0001
  9.4886    mov bx, #0x0210
  9.4887    mov cx, #0
  9.4888 -  mov edx, #0x20494350
  9.4889 +  mov edx, #0x20494350 ;; "PCI "
  9.4890    mov edi, #0xf0000
  9.4891    mov di, #pcibios_protected
  9.4892    clc
  9.4893 @@ -9341,10 +9752,10 @@ pci_real_f02: ;; find pci device
  9.4894    push esi
  9.4895    push edi
  9.4896    cmp al, #0x02
  9.4897 -  jne pci_real_f08
  9.4898 +  jne pci_real_f03
  9.4899    shl ecx, #16
  9.4900    mov cx, dx
  9.4901 -  mov bx, #0x0000
  9.4902 +  xor bx, bx
  9.4903    mov di, #0x00
  9.4904  pci_real_devloop:
  9.4905    call pci_real_select_reg
  9.4906 @@ -9361,7 +9772,30 @@ pci_real_nextdev:
  9.4907    jne pci_real_devloop
  9.4908    mov dx, cx
  9.4909    shr ecx, #16
  9.4910 -  mov ah, #0x86
  9.4911 +  mov ax, #0x8602
  9.4912 +  jmp pci_real_fail
  9.4913 +pci_real_f03: ;; find class code
  9.4914 +  cmp al, #0x03
  9.4915 +  jne pci_real_f08
  9.4916 +  xor bx, bx
  9.4917 +  mov di, #0x08
  9.4918 +pci_real_devloop2:
  9.4919 +  call pci_real_select_reg
  9.4920 +  mov dx, #0x0cfc
  9.4921 +  in  eax, dx
  9.4922 +  shr eax, #8
  9.4923 +  cmp eax, ecx
  9.4924 +  jne pci_real_nextdev2
  9.4925 +  cmp si, #0
  9.4926 +  je  pci_real_ok
  9.4927 +  dec si
  9.4928 +pci_real_nextdev2:
  9.4929 +  inc bx
  9.4930 +  cmp bx, #0x0100
  9.4931 +  jne pci_real_devloop2
  9.4932 +  mov dx, cx
  9.4933 +  shr ecx, #16
  9.4934 +  mov ax, #0x8603
  9.4935    jmp pci_real_fail
  9.4936  pci_real_f08: ;; read configuration byte
  9.4937    cmp al, #0x08
  9.4938 @@ -9423,7 +9857,7 @@ pci_real_f0c: ;; write configuration wor
  9.4939    jmp pci_real_ok
  9.4940  pci_real_f0d: ;; write configuration dword
  9.4941    cmp al, #0x0d
  9.4942 -  jne pci_real_unknown
  9.4943 +  jne pci_real_f0e
  9.4944    call pci_real_select_reg
  9.4945    push dx
  9.4946    mov dx, #0x0cfc
  9.4947 @@ -9431,6 +9865,46 @@ pci_real_f0d: ;; write configuration dwo
  9.4948    out dx, eax
  9.4949    pop dx
  9.4950    jmp pci_real_ok
  9.4951 +pci_real_f0e: ;; get irq routing options
  9.4952 +  cmp al, #0x0e
  9.4953 +  jne pci_real_unknown
  9.4954 +  SEG ES
  9.4955 +  cmp word ptr [di], #pci_routing_table_structure_end - pci_routing_table_structure_start
  9.4956 +  jb pci_real_too_small
  9.4957 +  SEG ES
  9.4958 +  mov word ptr [di], #pci_routing_table_structure_end - pci_routing_table_structure_start
  9.4959 +  pushf
  9.4960 +  push ds
  9.4961 +  push es
  9.4962 +  push cx
  9.4963 +  push si
  9.4964 +  push di
  9.4965 +  cld
  9.4966 +  mov si, #pci_routing_table_structure_start
  9.4967 +  push cs
  9.4968 +  pop ds
  9.4969 +  SEG ES
  9.4970 +  mov cx, [di+2]
  9.4971 +  SEG ES
  9.4972 +  mov es, [di+4]
  9.4973 +  mov di, cx
  9.4974 +  mov cx, #pci_routing_table_structure_end - pci_routing_table_structure_start
  9.4975 +  rep
  9.4976 +      movsb
  9.4977 +  pop di
  9.4978 +  pop si
  9.4979 +  pop cx
  9.4980 +  pop es
  9.4981 +  pop ds
  9.4982 +  popf
  9.4983 +  mov bx, #(1 << 9) | (1 << 11)   ;; irq 9 and 11 are used
  9.4984 +  jmp pci_real_ok
  9.4985 +pci_real_too_small:
  9.4986 +  SEG ES
  9.4987 +  mov word ptr [di], #pci_routing_table_structure_end - pci_routing_table_structure_start
  9.4988 +  mov ah, #0x89
  9.4989 +  jmp pci_real_fail
  9.4990 +
  9.4991  pci_real_unknown:
  9.4992    mov ah, #0x81
  9.4993  pci_real_fail:
  9.4994 @@ -9457,7 +9931,7 @@ pci_real_select_reg:
  9.4995    out dx,  eax
  9.4996    pop dx
  9.4997    ret
  9.4998 -  
  9.4999 +
  9.5000  .align 16
  9.5001  pci_routing_table_structure:
  9.5002    db 0x24, 0x50, 0x49, 0x52  ;; "$PIR" signature
  9.5003 @@ -9465,21 +9939,22 @@ pci_routing_table_structure:
  9.5004    dw 32 + (6 * 16) ;; table size
  9.5005    db 0 ;; PCI interrupt router bus
  9.5006    db 0x08 ;; PCI interrupt router DevFunc
  9.5007 -  dw 0x0000 ;; PCI exclusive IRQs 
  9.5008 +  dw 0x0000 ;; PCI exclusive IRQs
  9.5009    dw 0x8086 ;; compatible PCI interrupt router vendor ID
  9.5010 -  dw 0x7000 ;; compatible PCI interrupt router device ID
  9.5011 +  dw 0x122e ;; compatible PCI interrupt router device ID
  9.5012    dw 0,0 ;; Miniport data
  9.5013    db 0,0,0,0,0,0,0,0,0,0,0 ;; reserved
  9.5014 -  db 0x07 ;; checksum
  9.5015 +  db 0x37 ;; checksum
  9.5016 +pci_routing_table_structure_start:
  9.5017    ;; first slot entry PCI-to-ISA (embedded)
  9.5018    db 0 ;; pci bus number
  9.5019    db 0x08 ;; pci device number (bit 7-3)
  9.5020    db 0x61 ;; link value INTA#: pointer into PCI2ISA config space
  9.5021 -  dw 0x0c20 ;; IRQ bitmap INTA# 
  9.5022 +  dw 0x0c20 ;; IRQ bitmap INTA#
  9.5023    db 0x62 ;; link value INTB#
  9.5024 -  dw 0x0c20 ;; IRQ bitmap INTB# 
  9.5025 +  dw 0x0c20 ;; IRQ bitmap INTB#
  9.5026    db 0x63 ;; link value INTC#
  9.5027 -  dw 0x0c20 ;; IRQ bitmap INTC# 
  9.5028 +  dw 0x0c20 ;; IRQ bitmap INTC#
  9.5029    db 0x60 ;; link value INTD#
  9.5030    dw 0x0c20 ;; IRQ bitmap INTD#
  9.5031    db 0 ;; physical slot (0 = embedded)
  9.5032 @@ -9488,11 +9963,11 @@ pci_routing_table_structure:
  9.5033    db 0 ;; pci bus number
  9.5034    db 0x10 ;; pci device number (bit 7-3)
  9.5035    db 0x62 ;; link value INTA#
  9.5036 -  dw 0x0c20 ;; IRQ bitmap INTA# 
  9.5037 +  dw 0x0c20 ;; IRQ bitmap INTA#
  9.5038    db 0x63 ;; link value INTB#
  9.5039 -  dw 0x0c20 ;; IRQ bitmap INTB# 
  9.5040 +  dw 0x0c20 ;; IRQ bitmap INTB#
  9.5041    db 0x60 ;; link value INTC#
  9.5042 -  dw 0x0c20 ;; IRQ bitmap INTC# 
  9.5043 +  dw 0x0c20 ;; IRQ bitmap INTC#
  9.5044    db 0x61 ;; link value INTD#
  9.5045    dw 0x0c20 ;; IRQ bitmap INTD#
  9.5046    db 1 ;; physical slot (0 = embedded)
  9.5047 @@ -9501,11 +9976,11 @@ pci_routing_table_structure:
  9.5048    db 0 ;; pci bus number
  9.5049    db 0x18 ;; pci device number (bit 7-3)
  9.5050    db 0x63 ;; link value INTA#
  9.5051 -  dw 0x0c20 ;; IRQ bitmap INTA# 
  9.5052 +  dw 0x0c20 ;; IRQ bitmap INTA#
  9.5053    db 0x60 ;; link value INTB#
  9.5054 -  dw 0x0c20 ;; IRQ bitmap INTB# 
  9.5055 +  dw 0x0c20 ;; IRQ bitmap INTB#
  9.5056    db 0x61 ;; link value INTC#
  9.5057 -  dw 0x0c20 ;; IRQ bitmap INTC# 
  9.5058 +  dw 0x0c20 ;; IRQ bitmap INTC#
  9.5059    db 0x62 ;; link value INTD#
  9.5060    dw 0x0c20 ;; IRQ bitmap INTD#
  9.5061    db 2 ;; physical slot (0 = embedded)
  9.5062 @@ -9514,11 +9989,11 @@ pci_routing_table_structure:
  9.5063    db 0 ;; pci bus number
  9.5064    db 0x20 ;; pci device number (bit 7-3)
  9.5065    db 0x60 ;; link value INTA#
  9.5066 -  dw 0x0c20 ;; IRQ bitmap INTA# 
  9.5067 +  dw 0x0c20 ;; IRQ bitmap INTA#
  9.5068    db 0x61 ;; link value INTB#
  9.5069 -  dw 0x0c20 ;; IRQ bitmap INTB# 
  9.5070 +  dw 0x0c20 ;; IRQ bitmap INTB#
  9.5071    db 0x62 ;; link value INTC#
  9.5072 -  dw 0x0c20 ;; IRQ bitmap INTC# 
  9.5073 +  dw 0x0c20 ;; IRQ bitmap INTC#
  9.5074    db 0x63 ;; link value INTD#
  9.5075    dw 0x0c20 ;; IRQ bitmap INTD#
  9.5076    db 3 ;; physical slot (0 = embedded)
  9.5077 @@ -9527,11 +10002,11 @@ pci_routing_table_structure:
  9.5078    db 0 ;; pci bus number
  9.5079    db 0x28 ;; pci device number (bit 7-3)
  9.5080    db 0x61 ;; link value INTA#
  9.5081 -  dw 0x0c20 ;; IRQ bitmap INTA# 
  9.5082 +  dw 0x0c20 ;; IRQ bitmap INTA#
  9.5083    db 0x62 ;; link value INTB#
  9.5084 -  dw 0x0c20 ;; IRQ bitmap INTB# 
  9.5085 +  dw 0x0c20 ;; IRQ bitmap INTB#
  9.5086    db 0x63 ;; link value INTC#
  9.5087 -  dw 0x0c20 ;; IRQ bitmap INTC# 
  9.5088 +  dw 0x0c20 ;; IRQ bitmap INTC#
  9.5089    db 0x60 ;; link value INTD#
  9.5090    dw 0x0c20 ;; IRQ bitmap INTD#
  9.5091    db 4 ;; physical slot (0 = embedded)
  9.5092 @@ -9540,17 +10015,352 @@ pci_routing_table_structure:
  9.5093    db 0 ;; pci bus number
  9.5094    db 0x30 ;; pci device number (bit 7-3)
  9.5095    db 0x62 ;; link value INTA#
  9.5096 -  dw 0x0c20 ;; IRQ bitmap INTA# 
  9.5097 +  dw 0x0c20 ;; IRQ bitmap INTA#
  9.5098    db 0x63 ;; link value INTB#
  9.5099 -  dw 0x0c20 ;; IRQ bitmap INTB# 
  9.5100 +  dw 0x0c20 ;; IRQ bitmap INTB#
  9.5101    db 0x60 ;; link value INTC#
  9.5102 -  dw 0x0c20 ;; IRQ bitmap INTC# 
  9.5103 +  dw 0x0c20 ;; IRQ bitmap INTC#
  9.5104    db 0x61 ;; link value INTD#
  9.5105    dw 0x0c20 ;; IRQ bitmap INTD#
  9.5106    db 5 ;; physical slot (0 = embedded)
  9.5107    db 0 ;; reserved
  9.5108 +pci_routing_table_structure_end:
  9.5109 +
  9.5110 +#if !BX_ROMBIOS32
  9.5111 +pci_irq_list:
  9.5112 +  db 11, 10, 9, 5;
  9.5113 +
  9.5114 +pcibios_init_sel_reg:
  9.5115 +  push eax
  9.5116 +  mov eax, #0x800000
  9.5117 +  mov ax,  bx
  9.5118 +  shl eax, #8
  9.5119 +  and dl,  #0xfc
  9.5120 +  or  al,  dl
  9.5121 +  mov dx,  #0x0cf8
  9.5122 +  out dx,  eax
  9.5123 +  pop eax
  9.5124 +  ret
  9.5125 +
  9.5126 +pcibios_init_iomem_bases:
  9.5127 +  push bp
  9.5128 +  mov  bp, sp
  9.5129 +  mov  eax, #0xe0000000 ;; base for memory init
  9.5130 +  push eax
  9.5131 +  mov  ax, #0xc000 ;; base for i/o init
  9.5132 +  push ax
  9.5133 +  mov  ax, #0x0010 ;; start at base address #0
  9.5134 +  push ax
  9.5135 +  mov  bx, #0x0008
  9.5136 +pci_init_io_loop1:
  9.5137 +  mov  dl, #0x00
  9.5138 +  call pcibios_init_sel_reg
  9.5139 +  mov  dx, #0x0cfc
  9.5140 +  in   ax, dx
  9.5141 +  cmp  ax, #0xffff
  9.5142 +  jz   next_pci_dev
  9.5143 +  mov  dl, #0x04 ;; disable i/o and memory space access
  9.5144 +  call pcibios_init_sel_reg
  9.5145 +  mov  dx, #0x0cfc
  9.5146 +  in   al, dx
  9.5147 +  and  al, #0xfc
  9.5148 +  out  dx, al
  9.5149 +pci_init_io_loop2:
  9.5150 +  mov  dl, [bp-8]
  9.5151 +  call pcibios_init_sel_reg
  9.5152 +  mov  dx, #0x0cfc
  9.5153 +  in   eax, dx
  9.5154 +  test al, #0x01
  9.5155 +  jnz  init_io_base
  9.5156 +  mov  ecx, eax
  9.5157 +  mov  eax, #0xffffffff
  9.5158 +  out  dx, eax
  9.5159 +  in   eax, dx
  9.5160 +  cmp  eax, ecx
  9.5161 +  je   next_pci_base
  9.5162 +  xor  eax, #0xffffffff
  9.5163 +  mov  ecx, eax
  9.5164 +  mov  eax, [bp-4]
  9.5165 +  out  dx, eax
  9.5166 +  add  eax, ecx ;; calculate next free mem base
  9.5167 +  add  eax, #0x01000000
  9.5168 +  and  eax, #0xff000000
  9.5169 +  mov  [bp-4], eax
  9.5170 +  jmp  next_pci_base
  9.5171 +init_io_base:
  9.5172 +  mov  cx, ax
  9.5173 +  mov  ax, #0xffff
  9.5174 +  out  dx, ax
  9.5175 +  in   ax, dx
  9.5176 +  cmp  ax, cx
  9.5177 +  je   next_pci_base
  9.5178 +  xor  ax, #0xfffe
  9.5179 +  mov  cx, ax
  9.5180 +  mov  ax, [bp-6]
  9.5181 +  out  dx, ax
  9.5182 +  add  ax, cx ;; calculate next free i/o base
  9.5183 +  add  ax, #0x0100
  9.5184 +  and  ax, #0xff00
  9.5185 +  mov  [bp-6], ax
  9.5186 +next_pci_base:
  9.5187 +  mov  al, [bp-8]
  9.5188 +  add  al, #0x04
  9.5189 +  cmp  al, #0x28
  9.5190 +  je   enable_iomem_space
  9.5191 +  mov  byte ptr[bp-8], al
  9.5192 +  jmp  pci_init_io_loop2
  9.5193 +enable_iomem_space:
  9.5194 +  mov  dl, #0x04 ;; enable i/o and memory space access if available
  9.5195 +  call pcibios_init_sel_reg
  9.5196 +  mov  dx, #0x0cfc
  9.5197 +  in   al, dx
  9.5198 +  or   al, #0x07
  9.5199 +  out  dx, al
  9.5200 +next_pci_dev:
  9.5201 +  mov  byte ptr[bp-8], #0x10
  9.5202 +  inc  bx
  9.5203 +  cmp  bx, #0x0100
  9.5204 +  jne  pci_init_io_loop1
  9.5205 +  mov  sp, bp
  9.5206 +  pop  bp
  9.5207 +  ret
  9.5208 +
  9.5209 +pcibios_init_set_elcr:
  9.5210 +  push ax
  9.5211 +  push cx
  9.5212 +  mov  dx, #0x04d0
  9.5213 +  test al, #0x08
  9.5214 +  jz   is_master_pic
  9.5215 +  inc  dx
  9.5216 +  and  al, #0x07
  9.5217 +is_master_pic:
  9.5218 +  mov  cl, al
  9.5219 +  mov  bl, #0x01
  9.5220 +  shl  bl, cl
  9.5221 +  in   al, dx
  9.5222 +  or   al, bl
  9.5223 +  out  dx, al
  9.5224 +  pop  cx
  9.5225 +  pop  ax
  9.5226 +  ret
  9.5227 +
  9.5228 +pcibios_init_irqs:
  9.5229 +  push ds
  9.5230 +  push bp
  9.5231 +  mov  ax, #0xf000
  9.5232 +  mov  ds, ax
  9.5233 +  mov  dx, #0x04d0 ;; reset ELCR1 + ELCR2
  9.5234 +  mov  al, #0x00
  9.5235 +  out  dx, al
  9.5236 +  inc  dx
  9.5237 +  out  dx, al
  9.5238 +  mov  si, #pci_routing_table_structure
  9.5239 +  mov  bh, [si+8]
  9.5240 +  mov  bl, [si+9]
  9.5241 +  mov  dl, #0x00
  9.5242 +  call pcibios_init_sel_reg
  9.5243 +  mov  dx, #0x0cfc
  9.5244 +  in   eax, dx
  9.5245 +  cmp  eax, [si+12] ;; check irq router
  9.5246 +  jne  pci_init_end
  9.5247 +  mov  dl, [si+34]
  9.5248 +  call pcibios_init_sel_reg
  9.5249 +  push bx ;; save irq router bus + devfunc
  9.5250 +  mov  dx, #0x0cfc
  9.5251 +  mov  ax, #0x8080
  9.5252 +  out  dx, ax ;; reset PIRQ route control
  9.5253 +  add  dx, #2
  9.5254 +  out  dx, ax
  9.5255 +  mov  ax, [si+6]
  9.5256 +  sub  ax, #0x20
  9.5257 +  shr  ax, #4
  9.5258 +  mov  cx, ax
  9.5259 +  add  si, #0x20 ;; set pointer to 1st entry
  9.5260 +  mov  bp, sp
  9.5261 +  mov  ax, #pci_irq_list
  9.5262 +  push ax
  9.5263 +  xor  ax, ax
  9.5264 +  push ax
  9.5265 +pci_init_irq_loop1:
  9.5266 +  mov  bh, [si]
  9.5267 +  mov  bl, [si+1]
  9.5268 +pci_init_irq_loop2:
  9.5269 +  mov  dl, #0x00
  9.5270 +  call pcibios_init_sel_reg
  9.5271 +  mov  dx, #0x0cfc
  9.5272 +  in   ax, dx
  9.5273 +  cmp  ax, #0xffff
  9.5274 +  jnz  pci_test_int_pin
  9.5275 +  test bl, #0x07
  9.5276 +  jz   next_pir_entry
  9.5277 +  jmp  next_pci_func
  9.5278 +pci_test_int_pin:
  9.5279 +  mov  dl, #0x3c
  9.5280 +  call pcibios_init_sel_reg
  9.5281 +  mov  dx, #0x0cfd
  9.5282 +  in   al, dx
  9.5283 +  and  al, #0x07
  9.5284 +  jz   next_pci_func
  9.5285 +  dec  al ;; determine pirq reg
  9.5286 +  mov  dl, #0x03
  9.5287 +  mul  al, dl
  9.5288 +  add  al, #0x02
  9.5289 +  xor  ah, ah
  9.5290 +  mov  bx, ax
  9.5291 +  mov  al, [si+bx]
  9.5292 +  mov  dl, al
  9.5293 +  mov  bx, [bp]
  9.5294 +  call pcibios_init_sel_reg
  9.5295 +  mov  dx, #0x0cfc
  9.5296 +  and  al, #0x03
  9.5297 +  add  dl, al
  9.5298 +  in   al, dx
  9.5299 +  cmp  al, #0x80
  9.5300 +  jb   pirq_found
  9.5301 +  mov  bx, [bp-2] ;; pci irq list pointer
  9.5302 +  mov  al, [bx]
  9.5303 +  out  dx, al
  9.5304 +  inc  bx
  9.5305 +  mov  [bp-2], bx
  9.5306 +  call pcibios_init_set_elcr
  9.5307 +pirq_found:
  9.5308 +  mov  bh, [si]
  9.5309 +  mov  bl, [si+1]
  9.5310 +  add  bl, [bp-3] ;; pci function number
  9.5311 +  mov  dl, #0x3c
  9.5312 +  call pcibios_init_sel_reg
  9.5313 +  mov  dx, #0x0cfc
  9.5314 +  out  dx, al
  9.5315 +next_pci_func:
  9.5316 +  inc  byte ptr[bp-3]
  9.5317 +  inc  bl
  9.5318 +  test bl, #0x07
  9.5319 +  jnz  pci_init_irq_loop2
  9.5320 +next_pir_entry:
  9.5321 +  add  si, #0x10
  9.5322 +  mov  byte ptr[bp-3], #0x00
  9.5323 +  loop pci_init_irq_loop1
  9.5324 +  mov  sp, bp
  9.5325 +  pop  bx
  9.5326 +pci_init_end:
  9.5327 +  pop  bp
  9.5328 +  pop  ds
  9.5329 +  ret
  9.5330 +#endif // !BX_ROMBIOS32
  9.5331  #endif // BX_PCIBIOS
  9.5332  
  9.5333 +#if BX_ROMBIOS32
  9.5334 +rombios32_init:
  9.5335 +  ;; save a20 and enable it
  9.5336 +  in al, 0x92
  9.5337 +  push ax
  9.5338 +  or al, #0x02
  9.5339 +  out 0x92, al
  9.5340 +
  9.5341 +  ;; save SS:SP to the BDA
  9.5342 +  xor ax, ax
  9.5343 +  mov ds, ax
  9.5344 +  mov 0x0469, ss
  9.5345 +  mov 0x0467, sp
  9.5346 +
  9.5347 +  SEG CS
  9.5348 +    lidt [pmode_IDT_info]
  9.5349 +  SEG CS
  9.5350 +    lgdt [rombios32_gdt_48]
  9.5351 +  ;; set PE bit in CR0
  9.5352 +  mov  eax, cr0
  9.5353 +  or   al, #0x01
  9.5354 +  mov  cr0, eax
  9.5355 +  ;; start protected mode code: ljmpl 0x10:rombios32_init1
  9.5356 +  db 0x66, 0xea
  9.5357 +  dw rombios32_05
  9.5358 +  dw 0x000f       ;; high 16 bit address
  9.5359 +  dw 0x0010
  9.5360 +
  9.5361 +use32 386
  9.5362 +rombios32_05:
  9.5363 +  ;; init data segments
  9.5364 +  mov eax, #0x18
  9.5365 +  mov ds, ax
  9.5366 +  mov es, ax
  9.5367 +  mov ss, ax
  9.5368 +  xor eax, eax
  9.5369 +  mov fs, ax
  9.5370 +  mov gs, ax
  9.5371 +  cld
  9.5372 +
  9.5373 +  ;; init the stack pointer to point below EBDA
  9.5374 +  mov ax, [0x040e]
  9.5375 +  shl eax, #4
  9.5376 +  mov esp, #-0x10
  9.5377 +  add esp, eax
  9.5378 +
  9.5379 +  ;; pass pointer to s3_resume_flag and s3_resume_vector to rombios32
  9.5380 +  push #0x04b0
  9.5381 +  push #0x04b2
  9.5382 +
  9.5383 +  ;; call rombios32 code
  9.5384 +  mov eax, #0x000e0000
  9.5385 +  call eax
  9.5386 +
  9.5387 +  ;; return to 16 bit protected mode first
  9.5388 +  db 0xea
  9.5389 +  dd rombios32_10
  9.5390 +  dw 0x20
  9.5391 +
  9.5392 +use16 386
  9.5393 +rombios32_10:
  9.5394 +  ;; restore data segment limits to 0xffff
  9.5395 +  mov ax, #0x28
  9.5396 +  mov ds, ax
  9.5397 +  mov es, ax
  9.5398 +  mov ss, ax
  9.5399 +  mov fs, ax
  9.5400 +  mov gs, ax
  9.5401 +
  9.5402 +  ;; reset PE bit in CR0
  9.5403 +  mov  eax, cr0
  9.5404 +  and  al, #0xFE
  9.5405 +  mov  cr0, eax
  9.5406 +
  9.5407 +  ;; far jump to flush CPU queue after transition to real mode
  9.5408 +  JMP_AP(0xf000, rombios32_real_mode)
  9.5409 +
  9.5410 +rombios32_real_mode:
  9.5411 +  ;; restore IDT to normal real-mode defaults
  9.5412 +  SEG CS
  9.5413 +    lidt [rmode_IDT_info]
  9.5414 +
  9.5415 +  xor ax, ax
  9.5416 +  mov ds, ax
  9.5417 +  mov es, ax
  9.5418 +  mov fs, ax
  9.5419 +  mov gs, ax
  9.5420 +
  9.5421 +  ;; restore SS:SP from the BDA
  9.5422 +  mov ss, 0x0469
  9.5423 +  xor esp, esp
  9.5424 +  mov sp, 0x0467
  9.5425 +  ;; restore a20
  9.5426 +  pop ax
  9.5427 +  out 0x92, al
  9.5428 +  ret
  9.5429 +
  9.5430 +rombios32_gdt_48:
  9.5431 +  dw 0x30
  9.5432 +  dw rombios32_gdt
  9.5433 +  dw 0x000f
  9.5434 +
  9.5435 +rombios32_gdt:
  9.5436 +  dw 0, 0, 0, 0
  9.5437 +  dw 0, 0, 0, 0
  9.5438 +  dw 0xffff, 0, 0x9b00, 0x00cf ; 32 bit flat code segment (0x10)
  9.5439 +  dw 0xffff, 0, 0x9300, 0x00cf ; 32 bit flat data segment (0x18)
  9.5440 +  dw 0xffff, 0, 0x9b0f, 0x0000 ; 16 bit code segment base=0xf0000 limit=0xffff
  9.5441 +  dw 0xffff, 0, 0x9300, 0x0000 ; 16 bit data segment base=0x0 limit=0xffff
  9.5442 +#endif // BX_ROMBIOS32
  9.5443 +
  9.5444 +
  9.5445  ; parallel port detection: base address in DX, index in BX, timeout in CL
  9.5446  detect_parport:
  9.5447    push dx
  9.5448 @@ -9621,14 +10431,13 @@ checksum_loop:
  9.5449    ret
  9.5450  
  9.5451  
  9.5452 -;; We need a copy of this string, but we are not actually a PnP BIOS, 
  9.5453 +;; We need a copy of this string, but we are not actually a PnP BIOS,
  9.5454  ;; so make sure it is *not* aligned, so OSes will not see it if they scan.
  9.5455  .align 16
  9.5456    db 0
  9.5457  pnp_string:
  9.5458    .ascii "$PnP"
  9.5459  
  9.5460 -
  9.5461  rom_scan:
  9.5462    ;; Scan for existence of valid expansion ROMS.
  9.5463    ;;   Video ROM:   from 0xC0000..0xC7FFF in 2k increments
  9.5464 @@ -9645,8 +10454,9 @@ rom_scan:
  9.5465  #if BX_TCGBIOS
  9.5466    call _tcpa_start_option_rom_scan    /* specs: 3.2.3.3 + 10.4.3 */
  9.5467  #endif
  9.5468 -  mov  cx, #0xc000
  9.5469 +
  9.5470  rom_scan_loop:
  9.5471 +  push ax       ;; Save AX
  9.5472    mov  ds, cx
  9.5473    mov  ax, #0x0004 ;; start with increment of 4 (512-byte) blocks = 2k
  9.5474    cmp [0], #0xAA55 ;; look for signature
  9.5475 @@ -9663,6 +10473,8 @@ rom_scan_loop:
  9.5476    add  al, #0x04
  9.5477  block_count_rounded:
  9.5478  
  9.5479 +  xor  bx, bx   ;; Restore DS back to 0000:
  9.5480 +  mov  ds, bx
  9.5481  #if BX_TCGBIOS
  9.5482    push ax
  9.5483    push ds
  9.5484 @@ -9673,7 +10485,7 @@ block_count_rounded:
  9.5485    push ecx       ;; segment where option rom is located at
  9.5486    call _tcpa_option_rom                   /* specs: 3.2.3.3 */
  9.5487    add sp, #4    ;; pop segment
  9.5488 -  pop ecx	;; original ecx
  9.5489 +  pop ecx      ;; original ecx
  9.5490    pop ds
  9.5491    pop ax
  9.5492  #endif
  9.5493 @@ -9697,11 +10509,11 @@ fetch_bdf:
  9.5494    xor  ax, ax
  9.5495    mov  al, [bx]
  9.5496  
  9.5497 -  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.  
  9.5498 +  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
  9.5499    ;; That should stop it grabbing INT 19h; we will use its BEV instead.
  9.5500    mov  bx, #0xf000
  9.5501    mov  es, bx
  9.5502 -  lea  di, pnp_string 
  9.5503 +  lea  di, pnp_string
  9.5504  
  9.5505    xor  bx, bx   ;; Restore DS back to 0000:
  9.5506    mov  ds, bx
  9.5507 @@ -9714,8 +10526,8 @@ fetch_bdf:
  9.5508    add  sp, #2   ;; Pop offset value
  9.5509    pop  cx       ;; Pop seg value (restore CX)
  9.5510  
  9.5511 -  ;; Look at the ROM's PnP Expansion header.  Properly, we're supposed 
  9.5512 -  ;; to init all the ROMs and then go back and build an IPL table of 
  9.5513 +  ;; Look at the ROM's PnP Expansion header.  Properly, we're supposed
  9.5514 +  ;; to init all the ROMs and then go back and build an IPL table of
  9.5515    ;; all the bootable devices, but we can get away with one pass.
  9.5516    mov  ds, cx       ;; ROM base
  9.5517    mov  bx, 0x001a   ;; 0x1A is the offset into ROM header that contains...
  9.5518 @@ -9723,22 +10535,54 @@ fetch_bdf:
  9.5519    cmp  ax, #0x5024  ;; we look for signature "$PnP"
  9.5520    jne  no_bev
  9.5521    mov  ax, 2[bx]
  9.5522 -  cmp  ax, #0x506e 
  9.5523 +  cmp  ax, #0x506e
  9.5524    jne  no_bev
  9.5525 +
  9.5526 +  mov  ax, 0x16[bx] ;; 0x16 is the offset of Boot Connection Vector
  9.5527 +  cmp  ax, #0x0000
  9.5528 +  je   no_bcv
  9.5529 +
  9.5530 +  ;; Option ROM has BCV. Run it now.
  9.5531 +  push cx       ;; Push seg
  9.5532 +  push ax       ;; Push offset
  9.5533 +
  9.5534 +  ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
  9.5535 +  mov  bx, #0xf000
  9.5536 +  mov  es, bx
  9.5537 +  lea  di, pnp_string
  9.5538 +  /* jump to BCV function entry pointer */
  9.5539 +  mov  bp, sp   ;; Call ROM BCV routine using seg:off on stack
  9.5540 +  db   0xff     ;; call_far ss:[bp+0]
  9.5541 +  db   0x5e
  9.5542 +  db   0
  9.5543 +  cli           ;; In case expansion ROM BIOS turns IF on
  9.5544 +  add  sp, #2   ;; Pop offset value
  9.5545 +  pop  cx       ;; Pop seg value (restore CX)
  9.5546 +  jmp   no_bev
  9.5547 +
  9.5548 +no_bcv:
  9.5549    mov  ax, 0x1a[bx] ;; 0x1A is also the offset into the expansion header of...
  9.5550    cmp  ax, #0x0000  ;; the Bootstrap Entry Vector, or zero if there is none.
  9.5551    je   no_bev
  9.5552  
  9.5553 -  ;; Found a device that thinks it can boot the system.  Record its BEV.
  9.5554 -  mov  bx, #IPL_SEG            ;; Go to the segment where the IPL table lives 
  9.5555 +  ;; Found a device that thinks it can boot the system.  Record its BEV and product name string.
  9.5556 +  mov  di, 0x10[bx]            ;; Pointer to the product name string or zero if none
  9.5557 +  xor  bx, bx
  9.5558    mov  ds, bx
  9.5559 +  mov  bx, word ptr [0x40E]    ;; EBDA segment
  9.5560 +  mov  ds, bx                  ;; Go to the segment where the IPL table lives
  9.5561    mov  bx, IPL_COUNT_OFFSET    ;; Read the number of entries so far
  9.5562    cmp  bx, #IPL_TABLE_ENTRIES
  9.5563    je   no_bev                  ;; Get out if the table is full
  9.5564    shl  bx, #0x4                ;; Turn count into offset (entries are 16 bytes)
  9.5565 -  mov  0[bx], #0x80            ;; This entry is a BEV device
  9.5566 -  mov  6[bx], cx               ;; Build a far pointer from the segment...
  9.5567 -  mov  4[bx], ax               ;; and the offset
  9.5568 +  mov  IPL_TABLE_OFFSET+0[bx], #IPL_TYPE_BEV ;; This entry is a BEV device
  9.5569 +  mov  IPL_TABLE_OFFSET+6[bx], cx            ;; Build a far pointer from the segment...
  9.5570 +  mov  IPL_TABLE_OFFSET+4[bx], ax            ;; and the offset
  9.5571 +  cmp  di, #0x0000
  9.5572 +  je   no_prod_str
  9.5573 +  mov  0xA[bx], cx             ;; Build a far pointer from the segment...
  9.5574 +  mov  8[bx], di               ;; and the offset
  9.5575 +no_prod_str:
  9.5576    shr  bx, #0x4                ;; Turn the offset back into a count
  9.5577    inc  bx                      ;; We have one more entry now
  9.5578    mov  IPL_COUNT_OFFSET, bx    ;; Remember that.
  9.5579 @@ -9750,7 +10594,8 @@ rom_scan_increment:
  9.5580    shl  ax, #5   ;; convert 512-bytes blocks to 16-byte increments
  9.5581                  ;; because the segment selector is shifted left 4 bits.
  9.5582    add  cx, ax
  9.5583 -  cmp  cx, #0xe000
  9.5584 +  pop  ax       ;; Restore AX
  9.5585 +  cmp  cx, ax
  9.5586    jbe  rom_scan_loop
  9.5587  
  9.5588    xor  ax, ax   ;; Restore DS back to 0000:
  9.5589 @@ -9815,10 +10660,36 @@ tcpa_post_part2:
  9.5590  #endif
  9.5591  
  9.5592  
  9.5593 -;; for 'C' strings and other data, insert them here with
  9.5594 -;; a the following hack:
  9.5595 -;; DATA_SEG_DEFS_HERE
  9.5596 -
  9.5597 +post_init_pic:
  9.5598 +  mov al, #0x11 ; send initialisation commands
  9.5599 +  out 0x20, al
  9.5600 +  out 0xa0, al
  9.5601 +  mov al, #0x08
  9.5602 +  out 0x21, al
  9.5603 +  mov al, #0x70
  9.5604 +  out 0xa1, al
  9.5605 +  mov al, #0x04
  9.5606 +  out 0x21, al
  9.5607 +  mov al, #0x02
  9.5608 +  out 0xa1, al
  9.5609 +  mov al, #0x01
  9.5610 +  out 0x21, al
  9.5611 +  out 0xa1, al
  9.5612 +  mov  al, #0xb8
  9.5613 +  out  0x21, AL ;master pic: unmask IRQ 0, 1, 2, 6
  9.5614 +#if BX_USE_PS2_MOUSE
  9.5615 +  mov  al, #0x8f
  9.5616 +#else
  9.5617 +  mov  al, #0x9f
  9.5618 +#endif
  9.5619 +  out  0xa1, AL ;slave  pic: unmask IRQ 12, 13, 14
  9.5620 +  ret
  9.5621 +
  9.5622 +;; the following area can be used to write dynamically generated tables
  9.5623 +  .align 16
  9.5624 +bios_table_area_start:
  9.5625 +  dd 0xaafb4442
  9.5626 +  dd bios_table_area_end - bios_table_area_start - 8;
  9.5627  
  9.5628  ;--------
  9.5629  ;- POST -
  9.5630 @@ -9854,17 +10725,66 @@ post:
  9.5631  
  9.5632    ;; Examine CMOS shutdown status.
  9.5633    mov al, bl
  9.5634 -  mov dx, #EBDA_SEG
  9.5635 -  mov ds, dx
  9.5636 -  mov [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET], AL
  9.5637 +
  9.5638 +  ;; 0x00, 0x09, 0x0D+ = normal startup
  9.5639 +  cmp AL, #0x00
  9.5640 +  jz normal_post
  9.5641 +  cmp AL, #0x0d
  9.5642 +  jae normal_post
  9.5643 +  cmp AL, #0x09
  9.5644 +  je normal_post
  9.5645 +
  9.5646 +  ;; 0x05 = eoi + jmp via [0x40:0x67] jump
  9.5647 +  cmp al, #0x05
  9.5648 +  je  eoi_jmp_post
  9.5649 +
  9.5650 +  ;; 0x0A = jmp via [0x40:0x67] jump
  9.5651 +  cmp al, #0x0a
  9.5652 +  je  jmp_post_0x467
  9.5653 +
  9.5654 +  ;; 0x0B = iret via [0x40:0x67]
  9.5655 +  cmp al, #0x0b
  9.5656 +  je  iret_post_0x467
  9.5657 +
  9.5658 +  ;; 0x0C = retf via [0x40:0x67]
  9.5659 +  cmp al, #0x0c
  9.5660 +  je  retf_post_0x467
  9.5661 +
  9.5662 +  ;; Examine CMOS shutdown status.
  9.5663 +  ;;  0x01,0x02,0x03,0x04,0x06,0x07,0x08 = Unimplemented shutdown status.
  9.5664 +  push bx
  9.5665 +  call _shutdown_status_panic
  9.5666 +
  9.5667 +#if 0
  9.5668 +  HALT(__LINE__)
  9.5669 +  ;
  9.5670 +  ;#if 0
  9.5671 +  ;  0xb0, 0x20,       /* mov al, #0x20 */
  9.5672 +  ;  0xe6, 0x20,       /* out 0x20, al    ;send EOI to PIC */
  9.5673 +  ;#endif
  9.5674 +  ;
  9.5675 +  pop es
  9.5676 +  pop ds
  9.5677 +  popa
  9.5678 +  iret
  9.5679 +#endif
  9.5680 +
  9.5681 +normal_post:
  9.5682 +  ; case 0: normal startup
  9.5683  
  9.5684    cli
  9.5685    mov  ax, #0xfffe
  9.5686    mov  sp, ax
  9.5687 -  mov  ax, #0x0000
  9.5688 +  xor  ax, ax
  9.5689    mov  ds, ax
  9.5690    mov  ss, ax
  9.5691  
  9.5692 +  ;; Save shutdown status
  9.5693 +  mov 0x04b0, bl
  9.5694 +
  9.5695 +  cmp bl, #0xfe
  9.5696 +  jz s3_post
  9.5697 +
  9.5698    ;; zero out BIOS data area (40:00..40:ff)
  9.5699    mov  es, ax
  9.5700    mov  cx, #0x0080 ;; 128 words
  9.5701 @@ -9876,18 +10796,16 @@ post:
  9.5702    call _log_bios_start
  9.5703  
  9.5704    ;; set all interrupts to default handler
  9.5705 -  mov  bx, #0x0000    ;; offset index
  9.5706 +  xor  bx, bx         ;; offset index
  9.5707    mov  cx, #0x0100    ;; counter (256 interrupts)
  9.5708    mov  ax, #dummy_iret_handler
  9.5709    mov  dx, #0xF000
  9.5710  
  9.5711  post_default_ints:
  9.5712    mov  [bx], ax
  9.5713 -  inc  bx
  9.5714 -  inc  bx
  9.5715 +  add  bx, #2
  9.5716    mov  [bx], dx
  9.5717 -  inc  bx
  9.5718 -  inc  bx
  9.5719 +  add  bx, #2
  9.5720    loop post_default_ints
  9.5721  
  9.5722    ;; set vector 0x79 to zero
  9.5723 @@ -10043,28 +10961,51 @@ post_default_ints:
  9.5724    SET_INT_VECTOR(0x10, #0xF000, #int10_handler)
  9.5725  
  9.5726    ;; PIC
  9.5727 -  mov al, #0x11 ; send initialisation commands
  9.5728 -  out 0x20, al
  9.5729 -  out 0xa0, al
  9.5730 -  mov al, #0x08
  9.5731 -  out 0x21, al
  9.5732 -  mov al, #0x70
  9.5733 -  out 0xa1, al
  9.5734 -  mov al, #0x04
  9.5735 -  out 0x21, al
  9.5736 -  mov al, #0x02
  9.5737 -  out 0xa1, al
  9.5738 -  mov al, #0x01
  9.5739 -  out 0x21, al
  9.5740 -  out 0xa1, al
  9.5741 -  mov  al, #0xb8
  9.5742 -  out  0x21, AL ;master pic: unmask IRQ 0, 1, 2, 6
  9.5743 -#if BX_USE_PS2_MOUSE
  9.5744 -  mov  al, #0x8f
  9.5745 +  call post_init_pic
  9.5746 +
  9.5747 +  mov  cx, #0xc000  ;; init vga bios
  9.5748 +  mov  ax, #0xc780
  9.5749 +  call rom_scan
  9.5750 +
  9.5751 +  call _print_bios_banner
  9.5752 +
  9.5753 +#if BX_ROMBIOS32
  9.5754 +  call rombios32_init
  9.5755  #else
  9.5756 -  mov  al, #0x9f
  9.5757 -#endif
  9.5758 -  out  0xa1, AL ;slave  pic: unmask IRQ 12, 13, 14
  9.5759 +#if BX_PCIBIOS
  9.5760 +  call pcibios_init_iomem_bases
  9.5761 +  call pcibios_init_irqs
  9.5762 +#endif //BX_PCIBIOS
  9.5763 +#endif
  9.5764 +
  9.5765 +  ;;
  9.5766 +  ;; Floppy setup
  9.5767 +  ;;
  9.5768 +  call floppy_drive_post
  9.5769 +
  9.5770 +  ;;
  9.5771 +  ;; Hard Drive setup
  9.5772 +  ;;
  9.5773 +  call hard_drive_post
  9.5774 +
  9.5775 +#if BX_USE_ATADRV
  9.5776 +
  9.5777 +  ;;
  9.5778 +  ;; ATA/ATAPI driver setup
  9.5779 +  ;;
  9.5780 +  call _ata_init
  9.5781 +  call _ata_detect
  9.5782 +  ;;
  9.5783 +
  9.5784 +#endif // BX_USE_ATADRV
  9.5785 +
  9.5786 +#if BX_ELTORITO_BOOT
  9.5787 +  ;;
  9.5788 +  ;; eltorito floppy/harddisk emulation from cd
  9.5789 +  ;;
  9.5790 +  call _cdemu_init
  9.5791 +  ;;
  9.5792 +#endif // BX_ELTORITO_BOOT
  9.5793  
  9.5794  #ifdef HVMASSIST
  9.5795    call _enable_rom_write_access
  9.5796 @@ -10076,52 +11017,19 @@ post_default_ints:
  9.5797  
  9.5798    call _init_boot_vectors
  9.5799  
  9.5800 +  mov  cx, #0xc800  ;; init option roms
  9.5801 +  mov  ax, #0xe000
  9.5802    call rom_scan
  9.5803  
  9.5804 -  call _print_bios_banner 
  9.5805 -
  9.5806 -  ;;
  9.5807 -  ;; Floppy setup
  9.5808 -  ;;
  9.5809 -  call floppy_drive_post
  9.5810 -
  9.5811 -#if BX_USE_ATADRV
  9.5812 -
  9.5813 -  ;;
  9.5814 -  ;; Hard Drive setup
  9.5815 -  ;;
  9.5816 -  call hard_drive_post
  9.5817 -
  9.5818 -  ;;
  9.5819 -  ;; ATA/ATAPI driver setup
  9.5820 -  ;;
  9.5821 -  call _ata_init
  9.5822 -  call _ata_detect
  9.5823 -  ;;
  9.5824 -#else // BX_USE_ATADRV
  9.5825 -
  9.5826 -  ;;
  9.5827 -  ;; Hard Drive setup
  9.5828 -  ;;
  9.5829 -  call hard_drive_post
  9.5830 -
  9.5831 -#endif // BX_USE_ATADRV
  9.5832 -
  9.5833  #if BX_ELTORITO_BOOT
  9.5834 -  ;;
  9.5835 -  ;; eltorito floppy/harddisk emulation from cd
  9.5836 -  ;;
  9.5837 -  call _cdemu_init
  9.5838 -  ;;
  9.5839 +  call _interactive_bootkey
  9.5840  #endif // BX_ELTORITO_BOOT
  9.5841  
  9.5842 -  call _s3_resume
  9.5843 -  call _interactive_bootkey
  9.5844 -
  9.5845  #if BX_TCGBIOS
  9.5846    call tcpa_post_part2
  9.5847  #endif
  9.5848  
  9.5849 +  sti        ;; enable interrupts
  9.5850    ;; Start the boot sequence.   See the comments in int19_relocated 
  9.5851    ;; for why we use INT 18h instead of INT 19h here.
  9.5852    int  #0x18
  9.5853 @@ -10134,7 +11042,7 @@ nmi:
  9.5854    iret
  9.5855  
  9.5856  int75_handler:
  9.5857 -  out  0xf0, al         // clear irq13 
  9.5858 +  out  0xf0, al         // clear irq13
  9.5859    call eoi_both_pics    // clear interrupt
  9.5860    int  2                // legacy nmi call
  9.5861    iret
  9.5862 @@ -10233,7 +11141,7 @@ db 0x00
  9.5863  int14_handler:
  9.5864    push ds
  9.5865    pusha
  9.5866 -  mov  ax, #0x0000
  9.5867 +  xor  ax, ax
  9.5868    mov  ds, ax
  9.5869    call _int14_function
  9.5870    popa
  9.5871 @@ -10338,26 +11246,7 @@ int09_handler:
  9.5872    jz  int09_finish
  9.5873  
  9.5874    in  al, #0x60             ;;read key from keyboard controller
  9.5875 -  //test al, #0x80            ;;look for key release
  9.5876 -  //jnz  int09_process_key    ;; dont pass releases to intercept?
  9.5877 -
  9.5878 -  ;; check for extended key
  9.5879 -  cmp  al, #0xe0
  9.5880 -  jne int09_call_int15_4f
  9.5881 -  
  9.5882 -  push ds
  9.5883 -  xor  ax, ax
  9.5884 -  mov  ds, ax
  9.5885 -  mov  al, BYTE [0x496]     ;; mf2_state |= 0x01
  9.5886 -  or   al, #0x01
  9.5887 -  mov  BYTE [0x496], al
  9.5888 -  pop  ds
  9.5889 -  
  9.5890 -  in  al, #0x60             ;;read another key from keyboard controller
  9.5891 -
  9.5892    sti
  9.5893 -
  9.5894 -int09_call_int15_4f:
  9.5895    push  ds
  9.5896    pusha
  9.5897  #ifdef BX_CALL_INT15_4F
  9.5898 @@ -10367,8 +11256,27 @@ int09_call_int15_4f:
  9.5899    jnc  int09_done
  9.5900  #endif
  9.5901  
  9.5902 -
  9.5903 -//int09_process_key:
  9.5904 +  ;; check for extended key
  9.5905 +  cmp  al, #0xe0
  9.5906 +  jne int09_check_pause
  9.5907 +  xor  ax, ax
  9.5908 +  mov  ds, ax
  9.5909 +  mov  al, BYTE [0x496]     ;; mf2_state |= 0x02
  9.5910 +  or   al, #0x02
  9.5911 +  mov  BYTE [0x496], al
  9.5912 +  jmp int09_done
  9.5913 +
  9.5914 +int09_check_pause: ;; check for pause key
  9.5915 +  cmp  al, #0xe1
  9.5916 +  jne int09_process_key
  9.5917 +  xor  ax, ax
  9.5918 +  mov  ds, ax
  9.5919 +  mov  al, BYTE [0x496]     ;; mf2_state |= 0x01
  9.5920 +  or   al, #0x01
  9.5921 +  mov  BYTE [0x496], al
  9.5922 +  jmp int09_done
  9.5923 +
  9.5924 +int09_process_key:
  9.5925    mov   bx, #0xf000
  9.5926    mov   ds, bx
  9.5927    call  _int09_function
  9.5928 @@ -10386,8 +11294,6 @@ int09_finish:
  9.5929    iret
  9.5930  
  9.5931  
  9.5932 -
  9.5933 -
  9.5934  ;----------------------------------------
  9.5935  ;- INT 13h Diskette Service Entry Point -
  9.5936  ;----------------------------------------
  9.5937 @@ -10426,7 +11332,7 @@ int0e_loop2:
  9.5938    je int0e_loop2
  9.5939  int0e_normal:
  9.5940    push ds
  9.5941 -  mov  ax, #0x0000 ;; segment 0000
  9.5942 +  xor  ax, ax ;; segment 0000
  9.5943    mov  ds, ax
  9.5944    call eoi_master_pic
  9.5945    mov  al, 0x043e
  9.5946 @@ -10463,7 +11369,7 @@ db  0x08
  9.5947  int17_handler:
  9.5948    push ds
  9.5949    pusha
  9.5950 -  mov  ax, #0x0000
  9.5951 +  xor  ax, ax
  9.5952    mov  ds, ax
  9.5953    call _int17_function
  9.5954    popa
  9.5955 @@ -10653,11 +11559,11 @@ int1a_callfunction:
  9.5956  ;;
  9.5957  int70_handler:
  9.5958    push ds
  9.5959 -  pusha
  9.5960 +  pushad
  9.5961    xor  ax, ax
  9.5962    mov  ds, ax
  9.5963    call _int70_function
  9.5964 -  popa
  9.5965 +  popad
  9.5966    pop  ds
  9.5967    iret
  9.5968  
  9.5969 @@ -10715,7 +11621,7 @@ int08_store_ticks:
  9.5970  
  9.5971  
  9.5972  .org 0xff00
  9.5973 -.ascii "(c) 2002 MandrakeSoft S.A. Written by Kevin Lawton & the Bochs team."
  9.5974 +.ascii BIOS_COPYRIGHT_STRING
  9.5975  
  9.5976  ;------------------------------------------------
  9.5977  ;- IRET Instruction for Dummy Interrupt Handler -
  9.5978 @@ -10737,7 +11643,7 @@ dummy_iret_handler:
  9.5979  #ifdef HVMTEST
  9.5980    jmp 0xd000:0x0003;
  9.5981  #else
  9.5982 -  jmp 0xf000:post
  9.5983 +   jmp 0xf000:post
  9.5984  #endif
  9.5985  
  9.5986  .org 0xfff5 ; ASCII Date ROM was built - 8 characters in MM/DD/YY
  9.5987 @@ -10750,10 +11656,10 @@ db 0x00   ; filler
  9.5988  .org 0xfa6e ;; Character Font for 320x200 & 640x200 Graphics (lower 128 characters)
  9.5989  ASM_END
  9.5990  /*
  9.5991 - * This font comes from the fntcol16.zip package (c) by  Joseph Gil 
  9.5992 + * This font comes from the fntcol16.zip package (c) by  Joseph Gil
  9.5993   * found at ftp://ftp.simtel.net/pub/simtelnet/msdos/screen/fntcol16.zip
  9.5994   * This font is public domain
  9.5995 - */ 
  9.5996 + */
  9.5997  static Bit8u vgafont8[128*8]=
  9.5998  {
  9.5999   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  9.6000 @@ -10929,328 +11835,10 @@ db 0,0,0,0,0,0,0,0 ; 24 bytes
  9.6001  db 0,0,0,0,0,0,0   ; 31 bytes
  9.6002  ASM_END
  9.6003  
  9.6004 -#else // !HVMASSIST
  9.6005 -
  9.6006 +#endif // HVMASSIST
  9.6007  ASM_START
  9.6008 -.org 0xcc00
  9.6009 +.org 0xcff0
  9.6010 +bios_table_area_end:
  9.6011  // bcc-generated data will be placed here
  9.6012 -
  9.6013 -// For documentation of this config structure, look on developer.intel.com and
  9.6014 -// search for multiprocessor specification.  Note that when you change anything
  9.6015 -// you must update the checksum (a pain!).  It would be better to construct this
  9.6016 -// with C structures, or at least fill in the checksum automatically.
  9.6017 -//
  9.6018 -// Maybe this structs could be moved elsewhere than d000
  9.6019 -
  9.6020 -#if (BX_SMP_PROCESSORS==1)
  9.6021 -  // no structure necessary.
  9.6022 -#elif (BX_SMP_PROCESSORS==2)
  9.6023 -// define the Intel MP Configuration Structure for 2 processors at
  9.6024 -// APIC ID 0,1.  I/O APIC at ID=2.
  9.6025 -.align 16
  9.6026 -mp_config_table:
  9.6027 -  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
  9.6028 -  dw (mp_config_end-mp_config_table)  ;; table length
  9.6029 -  db 4 ;; spec rev
  9.6030 -  db 0x65 ;; checksum
  9.6031 -  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
  9.6032 -  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
  9.6033 -  db 0x20, 0x20, 0x20, 0x20 
  9.6034 -  db 0x20, 0x20, 0x20, 0x20
  9.6035 -  dw 0,0 ;; oem table ptr
  9.6036 -  dw 0 ;; oem table size
  9.6037 -  dw 20 ;; entry count
  9.6038 -  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
  9.6039 -  dw 0 ;; extended table length
  9.6040 -  db 0 ;; extended table checksum
  9.6041 -  db 0 ;; reserved
  9.6042 -mp_config_proc0:
  9.6043 -  db 0 ;; entry type=processor
  9.6044 -  db 0 ;; local APIC id
  9.6045 -  db 0x11 ;; local APIC version number
  9.6046 -  db 3 ;; cpu flags: enabled, bootstrap processor
  9.6047 -  db 0,6,0,0 ;; cpu signature
  9.6048 -  dw 0x201,0 ;; feature flags
  9.6049 -  dw 0,0 ;; reserved
  9.6050 -  dw 0,0 ;; reserved
  9.6051 -mp_config_proc1:
  9.6052 -  db 0 ;; entry type=processor
  9.6053 -  db 1 ;; local APIC id
  9.6054 -  db 0x11 ;; local APIC version number
  9.6055 -  db 1 ;; cpu flags: enabled
  9.6056 -  db 0,6,0,0 ;; cpu signature
  9.6057 -  dw 0x201,0 ;; feature flags
  9.6058 -  dw 0,0 ;; reserved
  9.6059 -  dw 0,0 ;; reserved
  9.6060 -mp_config_isa_bus:
  9.6061 -  db 1 ;; entry type=bus
  9.6062 -  db 0 ;; bus ID
  9.6063 -  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
  9.6064 -mp_config_ioapic:
  9.6065 -  db 2 ;; entry type=I/O APIC
  9.6066 -  db 2 ;; apic id=2. linux will set.
  9.6067 -  db 0x11 ;; I/O APIC version number
  9.6068 -  db 1 ;; flags=1=enabled
  9.6069 -  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
  9.6070 -mp_config_irqs:
  9.6071 -  db 3 ;; entry type=I/O interrupt
  9.6072 -  db 0 ;; interrupt type=vectored interrupt
  9.6073 -  db 0,0 ;; flags po=0, el=0 (linux uses as default)
  9.6074 -  db 0 ;; source bus ID is ISA
  9.6075 -  db 0 ;; source bus IRQ
  9.6076 -  db 2 ;; destination I/O APIC ID
  9.6077 -  db 0 ;; destination I/O APIC interrrupt in
  9.6078 -  ;; repeat pattern for interrupts 0-15
  9.6079 -  db 3,0,0,0,0,1,2,1
  9.6080 -  db 3,0,0,0,0,2,2,2
  9.6081 -  db 3,0,0,0,0,3,2,3
  9.6082 -  db 3,0,0,0,0,4,2,4
  9.6083 -  db 3,0,0,0,0,5,2,5
  9.6084 -  db 3,0,0,0,0,6,2,6
  9.6085 -  db 3,0,0,0,0,7,2,7
  9.6086 -  db 3,0,0,0,0,8,2,8
  9.6087 -  db 3,0,0,0,0,9,2,9
  9.6088 -  db 3,0,0,0,0,10,2,10
  9.6089 -  db 3,0,0,0,0,11,2,11
  9.6090 -  db 3,0,0,0,0,12,2,12
  9.6091 -  db 3,0,0,0,0,13,2,13
  9.6092 -  db 3,0,0,0,0,14,2,14
  9.6093 -  db 3,0,0,0,0,15,2,15
  9.6094 -#elif (BX_SMP_PROCESSORS==4)
  9.6095 -// define the Intel MP Configuration Structure for 4 processors at
  9.6096 -// APIC ID 0,1,2,3.  I/O APIC at ID=4.
  9.6097 -.align 16
  9.6098 -mp_config_table:
  9.6099 -  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
  9.6100 -  dw (mp_config_end-mp_config_table)  ;; table length
  9.6101 -  db 4 ;; spec rev
  9.6102 -  db 0xdd ;; checksum
  9.6103 -  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
  9.6104 -  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
  9.6105 -  db 0x20, 0x20, 0x20, 0x20 
  9.6106 -  db 0x20, 0x20, 0x20, 0x20
  9.6107 -  dw 0,0 ;; oem table ptr
  9.6108 -  dw 0 ;; oem table size
  9.6109 -  dw 22 ;; entry count
  9.6110 -  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
  9.6111 -  dw 0 ;; extended table length
  9.6112 -  db 0 ;; extended table checksum
  9.6113 -  db 0 ;; reserved
  9.6114 -mp_config_proc0:
  9.6115 -  db 0 ;; entry type=processor
  9.6116 -  db 0 ;; local APIC id
  9.6117 -  db 0x11 ;; local APIC version number
  9.6118 -  db 3 ;; cpu flags: enabled, bootstrap processor
  9.6119 -  db 0,6,0,0 ;; cpu signature
  9.6120 -  dw 0x201,0 ;; feature flags
  9.6121 -  dw 0,0 ;; reserved
  9.6122 -  dw 0,0 ;; reserved
  9.6123 -mp_config_proc1:
  9.6124 -  db 0 ;; entry type=processor
  9.6125 -  db 1 ;; local APIC id
  9.6126 -  db 0x11 ;; local APIC version number
  9.6127 -  db 1 ;; cpu flags: enabled
  9.6128 -  db 0,6,0,0 ;; cpu signature
  9.6129 -  dw 0x201,0 ;; feature flags
  9.6130 -  dw 0,0 ;; reserved
  9.6131 -  dw 0,0 ;; reserved
  9.6132 -mp_config_proc2:
  9.6133 -  db 0 ;; entry type=processor
  9.6134 -  db 2 ;; local APIC id
  9.6135 -  db 0x11 ;; local APIC version number
  9.6136 -  db 1 ;; cpu flags: enabled
  9.6137 -  db 0,6,0,0 ;; cpu signature
  9.6138 -  dw 0x201,0 ;; feature flags
  9.6139 -  dw 0,0 ;; reserved
  9.6140 -  dw 0,0 ;; reserved
  9.6141 -mp_config_proc3:
  9.6142 -  db 0 ;; entry type=processor
  9.6143 -  db 3 ;; local APIC id
  9.6144 -  db 0x11 ;; local APIC version number
  9.6145 -  db 1 ;; cpu flags: enabled
  9.6146 -  db 0,6,0,0 ;; cpu signature
  9.6147 -  dw 0x201,0 ;; feature flags
  9.6148 -  dw 0,0 ;; reserved
  9.6149 -  dw 0,0 ;; reserved
  9.6150 -mp_config_isa_bus:
  9.6151 -  db 1 ;; entry type=bus
  9.6152 -  db 0 ;; bus ID
  9.6153 -  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
  9.6154 -mp_config_ioapic:
  9.6155 -  db 2 ;; entry type=I/O APIC
  9.6156 -  db 4 ;; apic id=4. linux will set.
  9.6157 -  db 0x11 ;; I/O APIC version number
  9.6158 -  db 1 ;; flags=1=enabled
  9.6159 -  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
  9.6160 -mp_config_irqs:
  9.6161 -  db 3 ;; entry type=I/O interrupt
  9.6162 -  db 0 ;; interrupt type=vectored interrupt
  9.6163 -  db 0,0 ;; flags po=0, el=0 (linux uses as default)
  9.6164 -  db 0 ;; source bus ID is ISA
  9.6165 -  db 0 ;; source bus IRQ
  9.6166 -  db 4 ;; destination I/O APIC ID
  9.6167 -  db 0 ;; destination I/O APIC interrrupt in
  9.6168 -  ;; repeat pattern for interrupts 0-15
  9.6169 -  db 3,0,0,0,0,1,4,1
  9.6170 -  db 3,0,0,0,0,2,4,2
  9.6171 -  db 3,0,0,0,0,3,4,3
  9.6172 -  db 3,0,0,0,0,4,4,4
  9.6173 -  db 3,0,0,0,0,5,4,5
  9.6174 -  db 3,0,0,0,0,6,4,6
  9.6175 -  db 3,0,0,0,0,7,4,7
  9.6176 -  db 3,0,0,0,0,8,4,8
  9.6177 -  db 3,0,0,0,0,9,4,9
  9.6178 -  db 3,0,0,0,0,10,4,10
  9.6179 -  db 3,0,0,0,0,11,4,11
  9.6180 -  db 3,0,0,0,0,12,4,12
  9.6181 -  db 3,0,0,0,0,13,4,13
  9.6182 -  db 3,0,0,0,0,14,4,14
  9.6183 -  db 3,0,0,0,0,15,4,15
  9.6184 -#elif (BX_SMP_PROCESSORS==8)
  9.6185 -// define the Intel MP Configuration Structure for 8 processors at
  9.6186 -// APIC ID 0,1,2,3,4,5,6,7.  I/O APIC at ID=8.
  9.6187 -.align 16
  9.6188 -mp_config_table:
  9.6189 -  db 0x50, 0x43, 0x4d, 0x50  ;; "PCMP" signature
  9.6190 -  dw (mp_config_end-mp_config_table)  ;; table length
  9.6191 -  db 4 ;; spec rev
  9.6192 -  db 0xc3 ;; checksum
  9.6193 -  .ascii "BOCHSCPU"     ;; OEM id = "BOCHSCPU"
  9.6194 -  db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1         "
  9.6195 -  db 0x20, 0x20, 0x20, 0x20 
  9.6196 -  db 0x20, 0x20, 0x20, 0x20
  9.6197 -  dw 0,0 ;; oem table ptr
  9.6198 -  dw 0 ;; oem table size
  9.6199 -  dw 26 ;; entry count
  9.6200 -  dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
  9.6201 -  dw 0 ;; extended table length
  9.6202 -  db 0 ;; extended table checksum
  9.6203 -  db 0 ;; reserved
  9.6204 -mp_config_proc0:
  9.6205 -  db 0 ;; entry type=processor
  9.6206 -  db 0 ;; local APIC id
  9.6207 -  db 0x11 ;; local APIC version number
  9.6208 -  db 3 ;; cpu flags: enabled, bootstrap processor
  9.6209 -  db 0,6,0,0 ;; cpu signature
  9.6210 -  dw 0x201,0 ;; feature flags
  9.6211 -  dw 0,0 ;; reserved
  9.6212 -  dw 0,0 ;; reserved
  9.6213 -mp_config_proc1:
  9.6214 -  db 0 ;; entry type=processor
  9.6215 -  db 1 ;; local APIC id
  9.6216 -  db 0x11 ;; local APIC version number
  9.6217 -  db 1 ;; cpu flags: enabled
  9.6218 -  db 0,6,0,0 ;; cpu signature
  9.6219 -  dw 0x201,0 ;; feature flags
  9.6220 -  dw 0,0 ;; reserved
  9.6221 -  dw 0,0 ;; reserved
  9.6222 -mp_config_proc2:
  9.6223 -  db 0 ;; entry type=processor
  9.6224 -  db 2 ;; local APIC id
  9.6225 -  db 0x11 ;; local APIC version number
  9.6226 -  db 1 ;; cpu flags: enabled
  9.6227 -  db 0,6,0,0 ;; cpu signature
  9.6228 -  dw 0x201,0 ;; feature flags
  9.6229 -  dw 0,0 ;; reserved
  9.6230 -  dw 0,0 ;; reserved
  9.6231 -mp_config_proc3:
  9.6232 -  db 0 ;; entry type=processor
  9.6233 -  db 3 ;; local APIC id
  9.6234 -  db 0x11 ;; local APIC version number
  9.6235 -  db 1 ;; cpu flags: enabled
  9.6236 -  db 0,6,0,0 ;; cpu signature
  9.6237 -  dw 0x201,0 ;; feature flags
  9.6238 -  dw 0,0 ;; reserved
  9.6239 -  dw 0,0 ;; reserved
  9.6240 -mp_config_proc4:
  9.6241 -  db 0 ;; entry type=processor
  9.6242 -  db 4 ;; local APIC id
  9.6243 -  db 0x11 ;; local APIC version number
  9.6244 -  db 1 ;; cpu flags: enabled
  9.6245 -  db 0,6,0,0 ;; cpu signature
  9.6246 -  dw 0x201,0 ;; feature flags
  9.6247 -  dw 0,0 ;; reserved
  9.6248 -  dw 0,0 ;; reserved
  9.6249 -mp_config_proc5:
  9.6250 -  db 0 ;; entry type=processor
  9.6251 -  db 5 ;; local APIC id
  9.6252 -  db 0x11 ;; local APIC version number
  9.6253 -  db 1 ;; cpu flags: enabled
  9.6254 -  db 0,6,0,0 ;; cpu signature
  9.6255 -  dw 0x201,0 ;; feature flags
  9.6256 -  dw 0,0 ;; reserved
  9.6257 -  dw 0,0 ;; reserved
  9.6258 -mp_config_proc6:
  9.6259 -  db 0 ;; entry type=processor
  9.6260 -  db 6 ;; local APIC id
  9.6261 -  db 0x11 ;; local APIC version number
  9.6262 -  db 1 ;; cpu flags: enabled
  9.6263 -  db 0,6,0,0 ;; cpu signature
  9.6264 -  dw 0x201,0 ;; feature flags
  9.6265 -  dw 0,0 ;; reserved
  9.6266 -  dw 0,0 ;; reserved
  9.6267 -mp_config_proc7:
  9.6268 -  db 0 ;; entry type=processor
  9.6269 -  db 7 ;; local APIC id
  9.6270 -  db 0x11 ;; local APIC version number
  9.6271 -  db 1 ;; cpu flags: enabled
  9.6272 -  db 0,6,0,0 ;; cpu signature
  9.6273 -  dw 0x201,0 ;; feature flags
  9.6274 -  dw 0,0 ;; reserved
  9.6275 -  dw 0,0 ;; reserved
  9.6276 -mp_config_isa_bus:
  9.6277 -  db 1 ;; entry type=bus
  9.6278 -  db 0 ;; bus ID
  9.6279 -  db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20  ;; bus type="ISA   "
  9.6280 -mp_config_ioapic:
  9.6281 -  db 2 ;; entry type=I/O APIC
  9.6282 -  db 8 ;; apic id=8
  9.6283 -  db 0x11 ;; I/O APIC version number
  9.6284 -  db 1 ;; flags=1=enabled
  9.6285 -  dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
  9.6286 -mp_config_irqs:
  9.6287 -  db 3 ;; entry type=I/O interrupt
  9.6288 -  db 0 ;; interrupt type=vectored interrupt
  9.6289 -  db 0,0 ;; flags po=0, el=0 (linux uses as default)
  9.6290 -  db 0 ;; source bus ID is ISA
  9.6291 -  db 0 ;; source bus IRQ
  9.6292 -  db 8 ;; destination I/O APIC ID
  9.6293 -  db 0 ;; destination I/O APIC interrrupt in
  9.6294 -  ;; repeat pattern for interrupts 0-15
  9.6295 -  db 3,0,0,0,0,1,8,1
  9.6296 -  db 3,0,0,0,0,2,8,2
  9.6297 -  db 3,0,0,0,0,3,8,3
  9.6298 -  db 3,0,0,0,0,4,8,4
  9.6299 -  db 3,0,0,0,0,5,8,5
  9.6300 -  db 3,0,0,0,0,6,8,6
  9.6301 -  db 3,0,0,0,0,7,8,7
  9.6302 -  db 3,0,0,0,0,8,8,8
  9.6303 -  db 3,0,0,0,0,9,8,9
  9.6304 -  db 3,0,0,0,0,10,8,10
  9.6305 -  db 3,0,0,0,0,11,8,11
  9.6306 -  db 3,0,0,0,0,12,8,12
  9.6307 -  db 3,0,0,0,0,13,8,13
  9.6308 -  db 3,0,0,0,0,14,8,14
  9.6309 -  db 3,0,0,0,0,15,8,15
  9.6310 -#else
  9.6311 -#  error Sorry, rombios only has configurations for 1, 2, 4 or 8 processors.
  9.6312 -#endif  // if (BX_SMP_PROCESSORS==...)
  9.6313 -
  9.6314 -mp_config_end:   // this label used to find length of mp structure
  9.6315 - db 0
  9.6316 -
  9.6317 -#if (BX_SMP_PROCESSORS>1)
  9.6318 -.align 16
  9.6319 -mp_floating_pointer_structure:
  9.6320 -db 0x5f, 0x4d, 0x50, 0x5f   ; "_MP_" signature
  9.6321 -dw mp_config_table, 0xf ;; pointer to MP configuration table
  9.6322 -db 1     ;; length of this struct in 16-bit byte chunks
  9.6323 -db 4     ;; MP spec revision
  9.6324 -db 0xc1  ;; checksum
  9.6325 -db 0     ;; MP feature byte 1.  value 0 means look at the config table
  9.6326 -db 0,0,0,0     ;; MP feature bytes 2-5.
  9.6327 -#endif
  9.6328 -
  9.6329  ASM_END
  9.6330  
  9.6331 -#endif // HVMASSIST
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/firmware/rombios/rombios.h	Wed Dec 24 12:52:34 2008 +0900
    10.3 @@ -0,0 +1,70 @@
    10.4 +/////////////////////////////////////////////////////////////////////////
    10.5 +// $Id: rombios.h,v 1.8 2008/12/04 18:48:33 sshwarts Exp $
    10.6 +/////////////////////////////////////////////////////////////////////////
    10.7 +//
    10.8 +//  Copyright (C) 2006 Volker Ruppert
    10.9 +//
   10.10 +//  This library is free software; you can redistribute it and/or
   10.11 +//  modify it under the terms of the GNU Lesser General Public
   10.12 +//  License as published by the Free Software Foundation; either
   10.13 +//  version 2 of the License, or (at your option) any later version.
   10.14 +//
   10.15 +//  This library is distributed in the hope that it will be useful,
   10.16 +//  but WITHOUT ANY WARRANTY; without even the implied warranty of
   10.17 +//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   10.18 +//  Lesser General Public License for more details.
   10.19 +//
   10.20 +//  You should have received a copy of the GNU Lesser General Public
   10.21 +//  License along with this library; if not, write to the Free Software
   10.22 +//  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
   10.23 +
   10.24 +/* define it to include QEMU specific code */
   10.25 +//#define BX_QEMU
   10.26 +#define LEGACY
   10.27 +
   10.28 +#ifndef LEGACY
   10.29 +#  define BX_ROMBIOS32     1
   10.30 +#else
   10.31 +#  define BX_ROMBIOS32     0
   10.32 +#endif
   10.33 +#define DEBUG_ROMBIOS    1
   10.34 +
   10.35 +#define PANIC_PORT  0x400
   10.36 +#define PANIC_PORT2 0x401
   10.37 +#define INFO_PORT   0x402
   10.38 +#define DEBUG_PORT  0x403
   10.39 +
   10.40 +#define BIOS_PRINTF_HALT     1
   10.41 +#define BIOS_PRINTF_SCREEN   2
   10.42 +#define BIOS_PRINTF_INFO     4
   10.43 +#define BIOS_PRINTF_DEBUG    8
   10.44 +#define BIOS_PRINTF_ALL      (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
   10.45 +#define BIOS_PRINTF_DEBHALT  (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | BIOS_PRINTF_HALT)
   10.46 +
   10.47 +#define printf(format, p...)  bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
   10.48 +
   10.49 +// Defines the output macros.
   10.50 +// BX_DEBUG goes to INFO port until we can easily choose debug info on a
   10.51 +// per-device basis. Debug info are sent only in debug mode
   10.52 +#if DEBUG_ROMBIOS
   10.53 +#  define BX_DEBUG(format, p...)  bios_printf(BIOS_PRINTF_INFO, format, ##p)
   10.54 +#else
   10.55 +#  define BX_DEBUG(format, p...)
   10.56 +#endif
   10.57 +#define BX_INFO(format, p...)   bios_printf(BIOS_PRINTF_INFO, format, ##p)
   10.58 +#define BX_PANIC(format, p...)  bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
   10.59 +
   10.60 +#define ACPI_DATA_SIZE    0x00010000L
   10.61 +#define PM_IO_BASE        0xb000
   10.62 +#define SMB_IO_BASE       0xb100
   10.63 +
   10.64 +  // Define the application NAME
   10.65 +#if define HVMASSIST
   10.66 +#  define BX_APPNAME "HVMAssist"
   10.67 +#elif defined(BX_QEMU)
   10.68 +#  define BX_APPNAME "QEMU"
   10.69 +#elif defined(PLEX86)
   10.70 +#  define BX_APPNAME "Plex86"
   10.71 +#else
   10.72 +#  define BX_APPNAME "Bochs"
   10.73 +#endif
    11.1 --- a/tools/libxc/xc_dom_core.c	Wed Dec 24 12:50:57 2008 +0900
    11.2 +++ b/tools/libxc/xc_dom_core.c	Wed Dec 24 12:52:34 2008 +0900
    11.3 @@ -244,6 +244,7 @@ int xc_dom_do_gunzip(void *src, size_t s
    11.4          return -1;
    11.5      }
    11.6      rc = inflate(&zStream, Z_FINISH);
    11.7 +    inflateEnd(&zStream);
    11.8      if ( rc != Z_STREAM_END )
    11.9      {
   11.10          xc_dom_panic(XC_INTERNAL_ERROR,
    12.1 --- a/tools/libxc/xc_dom_x86.c	Wed Dec 24 12:50:57 2008 +0900
    12.2 +++ b/tools/libxc/xc_dom_x86.c	Wed Dec 24 12:52:34 2008 +0900
    12.3 @@ -418,7 +418,8 @@ static int start_info_x86_32(struct xc_d
    12.4      xc_dom_printf("%s: called\n", __FUNCTION__);
    12.5  
    12.6      memset(start_info, 0, sizeof(*start_info));
    12.7 -    snprintf(start_info->magic, sizeof(start_info->magic), dom->guest_type);
    12.8 +    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
    12.9 +    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
   12.10      start_info->nr_pages = dom->total_pages;
   12.11      start_info->shared_info = shinfo << PAGE_SHIFT_X86;
   12.12      start_info->pt_base = dom->pgtables_seg.vstart;
   12.13 @@ -457,7 +458,8 @@ static int start_info_x86_64(struct xc_d
   12.14      xc_dom_printf("%s: called\n", __FUNCTION__);
   12.15  
   12.16      memset(start_info, 0, sizeof(*start_info));
   12.17 -    snprintf(start_info->magic, sizeof(start_info->magic), dom->guest_type);
   12.18 +    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
   12.19 +    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
   12.20      start_info->nr_pages = dom->total_pages;
   12.21      start_info->shared_info = shinfo << PAGE_SHIFT_X86;
   12.22      start_info->pt_base = dom->pgtables_seg.vstart;
    13.1 --- a/tools/libxc/xc_domain.c	Wed Dec 24 12:50:57 2008 +0900
    13.2 +++ b/tools/libxc/xc_domain.c	Wed Dec 24 12:52:34 2008 +0900
    13.3 @@ -1061,6 +1061,20 @@ int xc_domain_suppress_spurious_page_fau
    13.4  
    13.5  }
    13.6  
    13.7 +int xc_domain_debug_control(int xc, uint32_t domid, uint32_t sop, uint32_t vcpu)
    13.8 +{
    13.9 +    DECLARE_DOMCTL;
   13.10 +
   13.11 +    memset(&domctl, 0, sizeof(domctl));
   13.12 +    domctl.domain = (domid_t)domid;
   13.13 +    domctl.cmd = XEN_DOMCTL_debug_op;
   13.14 +    domctl.u.debug_op.op     = sop;
   13.15 +    domctl.u.debug_op.vcpu   = vcpu;
   13.16 +
   13.17 +    return do_domctl(xc, &domctl);
   13.18 +}
   13.19 +
   13.20 +
   13.21  /*
   13.22   * Local variables:
   13.23   * mode: C
    14.1 --- a/tools/libxc/xc_domain_restore.c	Wed Dec 24 12:50:57 2008 +0900
    14.2 +++ b/tools/libxc/xc_domain_restore.c	Wed Dec 24 12:52:34 2008 +0900
    14.3 @@ -490,6 +490,22 @@ int xc_domain_restore(int xc_handle, int
    14.4              continue;
    14.5          }
    14.6  
    14.7 +        if ( j == -4 )
    14.8 +        {
    14.9 +            uint64_t vm86_tss;
   14.10 +
   14.11 +            /* Skip padding 4 bytes then read the vm86 TSS location. */
   14.12 +            if ( read_exact(io_fd, &vm86_tss, sizeof(uint32_t)) ||
   14.13 +                 read_exact(io_fd, &vm86_tss, sizeof(uint64_t)) )
   14.14 +            {
   14.15 +                ERROR("error read the address of the vm86 TSS");
   14.16 +                goto out;
   14.17 +            }
   14.18 +
   14.19 +            xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, vm86_tss);
   14.20 +            continue;
   14.21 +        }
   14.22 +
   14.23          if ( j == 0 )
   14.24              break;  /* our work here is done */
   14.25  
    15.1 --- a/tools/libxc/xc_domain_save.c	Wed Dec 24 12:50:57 2008 +0900
    15.2 +++ b/tools/libxc/xc_domain_save.c	Wed Dec 24 12:52:34 2008 +0900
    15.3 @@ -1388,20 +1388,32 @@ int xc_domain_save(int xc_handle, int io
    15.4      if ( hvm )
    15.5      {
    15.6          struct {
    15.7 -            int minusthree;
    15.8 +            int id;
    15.9              uint32_t pad;
   15.10 -            uint64_t ident_pt;
   15.11 -        } chunk = { -3, 0 };
   15.12 +            uint64_t data;
   15.13 +        } chunk = { 0, };
   15.14  
   15.15 +        chunk.id = -3;
   15.16          xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
   15.17 -                         (unsigned long *)&chunk.ident_pt);
   15.18 +                         (unsigned long *)&chunk.data);
   15.19  
   15.20 -        if ( (chunk.ident_pt != 0) &&
   15.21 +        if ( (chunk.data != 0) &&
   15.22               write_exact(io_fd, &chunk, sizeof(chunk)) )
   15.23          {
   15.24              PERROR("Error when writing the ident_pt for EPT guest");
   15.25              goto out;
   15.26          }
   15.27 +
   15.28 +        chunk.id = -4;
   15.29 +        xc_get_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS,
   15.30 +                         (unsigned long *)&chunk.data);
   15.31 +
   15.32 +        if ( (chunk.data != 0) &&
   15.33 +             write_exact(io_fd, &chunk, sizeof(chunk)) )
   15.34 +        {
   15.35 +            PERROR("Error when writing the vm86 TSS for guest");
   15.36 +            goto out;
   15.37 +        }
   15.38      }
   15.39  
   15.40      /* Zero terminate */
    16.1 --- a/tools/libxc/xc_pm.c	Wed Dec 24 12:50:57 2008 +0900
    16.2 +++ b/tools/libxc/xc_pm.c	Wed Dec 24 12:52:34 2008 +0900
    16.3 @@ -23,8 +23,13 @@
    16.4   *
    16.5   */
    16.6  
    16.7 +#include <errno.h>
    16.8 +#include <stdbool.h>
    16.9  #include "xc_private.h"
   16.10  
   16.11 +/*
   16.12 + * Get PM statistic info
   16.13 + */
   16.14  int xc_pm_get_max_px(int xc_handle, int cpuid, int *max_px)
   16.15  {
   16.16      DECLARE_SYSCTL;
   16.17 @@ -168,3 +173,136 @@ int xc_pm_reset_cxstat(int xc_handle, in
   16.18  
   16.19      return xc_sysctl(xc_handle, &sysctl);
   16.20  }
   16.21 +
   16.22 +
   16.23 +/*
   16.24 + * 1. Get PM parameter
   16.25 + * 2. Provide user PM control
   16.26 + */
   16.27 +int xc_get_cpufreq_para(int xc_handle, int cpuid,
   16.28 +                        struct xc_get_cpufreq_para *user_para)
   16.29 +{
   16.30 +    DECLARE_SYSCTL;
   16.31 +    int ret = 0;
   16.32 +    struct xen_get_cpufreq_para *sys_para = &sysctl.u.pm_op.get_para;
   16.33 +    bool has_num = user_para->cpu_num &&
   16.34 +                     user_para->freq_num &&
   16.35 +                     user_para->gov_num;
   16.36 +
   16.37 +    if ( (xc_handle < 0) || !user_para )
   16.38 +        return -EINVAL;
   16.39 +
   16.40 +    if ( has_num )
   16.41 +    {
   16.42 +        if ( (!user_para->affected_cpus)                    ||
   16.43 +             (!user_para->scaling_available_frequencies)    ||
   16.44 +             (!user_para->scaling_available_governors) )
   16.45 +            return -EINVAL;
   16.46 +
   16.47 +        if ( (ret = lock_pages(user_para->affected_cpus,
   16.48 +                               user_para->cpu_num * sizeof(uint32_t))) )
   16.49 +            goto unlock_1;
   16.50 +        if ( (ret = lock_pages(user_para->scaling_available_frequencies,
   16.51 +                               user_para->freq_num * sizeof(uint32_t))) )
   16.52 +            goto unlock_2;
   16.53 +        if ( (ret = lock_pages(user_para->scaling_available_governors,
   16.54 +                 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
   16.55 +            goto unlock_3;
   16.56 +
   16.57 +        set_xen_guest_handle(sys_para->affected_cpus,
   16.58 +                             user_para->affected_cpus);
   16.59 +        set_xen_guest_handle(sys_para->scaling_available_frequencies,
   16.60 +                             user_para->scaling_available_frequencies);
   16.61 +        set_xen_guest_handle(sys_para->scaling_available_governors,
   16.62 +                             user_para->scaling_available_governors);
   16.63 +    }
   16.64 +
   16.65 +    sysctl.cmd = XEN_SYSCTL_pm_op;
   16.66 +    sysctl.u.pm_op.cmd = GET_CPUFREQ_PARA;
   16.67 +    sysctl.u.pm_op.cpuid = cpuid;
   16.68 +    sys_para->cpu_num  = user_para->cpu_num;
   16.69 +    sys_para->freq_num = user_para->freq_num;
   16.70 +    sys_para->gov_num  = user_para->gov_num;
   16.71 +
   16.72 +    ret = xc_sysctl(xc_handle, &sysctl);
   16.73 +    if ( ret )
   16.74 +    {
   16.75 +        if ( errno == EAGAIN )
   16.76 +        {
   16.77 +            user_para->cpu_num  = sys_para->cpu_num;
   16.78 +            user_para->freq_num = sys_para->freq_num;
   16.79 +            user_para->gov_num  = sys_para->gov_num;
   16.80 +            ret = -errno;
   16.81 +        }
   16.82 +
   16.83 +        if ( has_num )
   16.84 +            goto unlock_4;
   16.85 +        goto unlock_1;
   16.86 +    }
   16.87 +    else
   16.88 +    {
   16.89 +        user_para->cpuinfo_cur_freq = sys_para->cpuinfo_cur_freq;
   16.90 +        user_para->cpuinfo_max_freq = sys_para->cpuinfo_max_freq;
   16.91 +        user_para->cpuinfo_min_freq = sys_para->cpuinfo_min_freq;
   16.92 +        user_para->scaling_cur_freq = sys_para->scaling_cur_freq;
   16.93 +        user_para->scaling_max_freq = sys_para->scaling_max_freq;
   16.94 +        user_para->scaling_min_freq = sys_para->scaling_min_freq;
   16.95 +
   16.96 +        memcpy(user_para->scaling_driver, 
   16.97 +                sys_para->scaling_driver, CPUFREQ_NAME_LEN);
   16.98 +        memcpy(user_para->scaling_governor,
   16.99 +                sys_para->scaling_governor, CPUFREQ_NAME_LEN);
  16.100 +
  16.101 +        /* copy to user_para no matter what cpufreq governor */
  16.102 +        XC_BUILD_BUG_ON(sizeof(((struct xc_get_cpufreq_para *)0)->u) !=
  16.103 +                        sizeof(((struct xen_get_cpufreq_para *)0)->u));
  16.104 +
  16.105 +        memcpy(&user_para->u, &sys_para->u, sizeof(sys_para->u));
  16.106 +    }
  16.107 +
  16.108 +unlock_4:
  16.109 +    unlock_pages(user_para->scaling_available_governors,
  16.110 +                 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char));
  16.111 +unlock_3:
  16.112 +    unlock_pages(user_para->scaling_available_frequencies,
  16.113 +                 user_para->freq_num * sizeof(uint32_t));
  16.114 +unlock_2:
  16.115 +    unlock_pages(user_para->affected_cpus,
  16.116 +                 user_para->cpu_num * sizeof(uint32_t));
  16.117 +unlock_1:
  16.118 +    return ret;
  16.119 +}
  16.120 +
  16.121 +int xc_set_cpufreq_gov(int xc_handle, int cpuid, char *govname)
  16.122 +{
  16.123 +    DECLARE_SYSCTL;
  16.124 +    char *scaling_governor = sysctl.u.pm_op.set_gov.scaling_governor;
  16.125 +
  16.126 +    if ( (xc_handle < 0) || (!govname) )
  16.127 +        return -EINVAL;
  16.128 +
  16.129 +    sysctl.cmd = XEN_SYSCTL_pm_op;
  16.130 +    sysctl.u.pm_op.cmd = SET_CPUFREQ_GOV;
  16.131 +    sysctl.u.pm_op.cpuid = cpuid;
  16.132 +    strncpy(scaling_governor, govname, CPUFREQ_NAME_LEN);
  16.133 +    scaling_governor[CPUFREQ_NAME_LEN - 1] = '\0';
  16.134 +
  16.135 +    return xc_sysctl(xc_handle, &sysctl);
  16.136 +}
  16.137 +
  16.138 +int xc_set_cpufreq_para(int xc_handle, int cpuid, 
  16.139 +                        int ctrl_type, int ctrl_value)
  16.140 +{
  16.141 +    DECLARE_SYSCTL;
  16.142 +
  16.143 +    if ( xc_handle < 0 )
  16.144 +        return -EINVAL;
  16.145 +
  16.146 +    sysctl.cmd = XEN_SYSCTL_pm_op;
  16.147 +    sysctl.u.pm_op.cmd = SET_CPUFREQ_PARA;
  16.148 +    sysctl.u.pm_op.cpuid = cpuid;
  16.149 +    sysctl.u.pm_op.set_para.ctrl_type = ctrl_type;
  16.150 +    sysctl.u.pm_op.set_para.ctrl_value = ctrl_value;
  16.151 +
  16.152 +    return xc_sysctl(xc_handle, &sysctl);
  16.153 +}
    17.1 --- a/tools/libxc/xc_private.h	Wed Dec 24 12:50:57 2008 +0900
    17.2 +++ b/tools/libxc/xc_private.h	Wed Dec 24 12:52:34 2008 +0900
    17.3 @@ -43,6 +43,9 @@
    17.4  #define INFO     1
    17.5  #define PROGRESS 0
    17.6  
    17.7 +/* Force a compilation error if condition is true */
    17.8 +#define XC_BUILD_BUG_ON(p) ((void)sizeof(struct { int:-!!(p); }))
    17.9 +
   17.10  /*
   17.11  ** Define max dirty page cache to permit during save/restore -- need to balance 
   17.12  ** keeping cache usage down with CPU impact of invalidating too often.
    18.1 --- a/tools/libxc/xc_ptrace.c	Wed Dec 24 12:50:57 2008 +0900
    18.2 +++ b/tools/libxc/xc_ptrace.c	Wed Dec 24 12:52:34 2008 +0900
    18.3 @@ -524,10 +524,20 @@ xc_ptrace(
    18.4          /*  XXX we can still have problems if the user switches threads
    18.5           *  during single-stepping - but that just seems retarded
    18.6           */
    18.7 -        ctxt[cpu].c.user_regs.eflags |= PSL_T;
    18.8 -        if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu,
    18.9 -                                &ctxt[cpu])))
   18.10 -            goto out_error_domctl;
   18.11 +        /* Try to enalbe Monitor Trap Flag for HVM, and fall back to TF
   18.12 +         * if no MTF support
   18.13 +         */
   18.14 +        if ( !current_is_hvm ||
   18.15 +             xc_domain_debug_control(xc_handle,
   18.16 +                                     current_domid,
   18.17 +                                     XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON,
   18.18 +                                     cpu) )
   18.19 +        {
   18.20 +            ctxt[cpu].c.user_regs.eflags |= PSL_T;
   18.21 +            if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu,
   18.22 +                                    &ctxt[cpu])))
   18.23 +                goto out_error_domctl;
   18.24 +        }
   18.25          /* FALLTHROUGH */
   18.26  
   18.27      case PTRACE_CONT:
   18.28 @@ -538,15 +548,22 @@ xc_ptrace(
   18.29          {
   18.30              FOREACH_CPU(cpumap, index) {
   18.31                  cpu = index - 1;
   18.32 -                if (fetch_regs(xc_handle, cpu, NULL))
   18.33 -                    goto out_error;
   18.34 -                /* Clear trace flag */
   18.35 -                if ( ctxt[cpu].c.user_regs.eflags & PSL_T )
   18.36 +                if ( !current_is_hvm ||
   18.37 +                      xc_domain_debug_control(xc_handle,
   18.38 +                                              current_domid,
   18.39 +                                              XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF,
   18.40 +                                              cpu) )
   18.41                  {
   18.42 -                    ctxt[cpu].c.user_regs.eflags &= ~PSL_T;
   18.43 -                    if ((retval = xc_vcpu_setcontext(xc_handle, current_domid,
   18.44 -                                                cpu, &ctxt[cpu])))
   18.45 -                        goto out_error_domctl;
   18.46 +                    if (fetch_regs(xc_handle, cpu, NULL))
   18.47 +                        goto out_error;
   18.48 +                    /* Clear trace flag */
   18.49 +                    if ( ctxt[cpu].c.user_regs.eflags & PSL_T )
   18.50 +                    {
   18.51 +                        ctxt[cpu].c.user_regs.eflags &= ~PSL_T;
   18.52 +                        if ((retval = xc_vcpu_setcontext(xc_handle, current_domid,
   18.53 +                                        cpu, &ctxt[cpu])))
   18.54 +                            goto out_error_domctl;
   18.55 +                    }
   18.56                  }
   18.57              }
   18.58          }
    19.1 --- a/tools/libxc/xenctrl.h	Wed Dec 24 12:50:57 2008 +0900
    19.2 +++ b/tools/libxc/xenctrl.h	Wed Dec 24 12:52:34 2008 +0900
    19.3 @@ -1111,6 +1111,12 @@ int xc_domain_set_target(int xc_handle,
    19.4                           uint32_t domid,
    19.5                           uint32_t target);
    19.6  
    19.7 +/* Control the domain for debug */
    19.8 +int xc_domain_debug_control(int xc_handle,
    19.9 +                            uint32_t domid,
   19.10 +                            uint32_t sop,
   19.11 +                            uint32_t vcpu);
   19.12 +
   19.13  #if defined(__i386__) || defined(__x86_64__)
   19.14  int xc_cpuid_check(int xc,
   19.15                     const unsigned int *input,
   19.16 @@ -1161,4 +1167,46 @@ int xc_pm_reset_cxstat(int xc_handle, in
   19.17  
   19.18  int xc_cpu_online(int xc_handle, int cpu);
   19.19  int xc_cpu_offline(int xc_handle, int cpu);
   19.20 +
   19.21 +/* 
   19.22 + * cpufreq para name of this structure named 
   19.23 + * same as sysfs file name of native linux
   19.24 + */
   19.25 +typedef xen_userspace_t xc_userspace_t;
   19.26 +typedef xen_ondemand_t xc_ondemand_t;
   19.27 +
   19.28 +struct xc_get_cpufreq_para {
   19.29 +    /* IN/OUT variable */
   19.30 +    uint32_t cpu_num;
   19.31 +    uint32_t freq_num;
   19.32 +    uint32_t gov_num;
   19.33 +
   19.34 +    /* for all governors */
   19.35 +    /* OUT variable */
   19.36 +    uint32_t *affected_cpus;
   19.37 +    uint32_t *scaling_available_frequencies;
   19.38 +    char     *scaling_available_governors;
   19.39 +    char scaling_driver[CPUFREQ_NAME_LEN];
   19.40 +
   19.41 +    uint32_t cpuinfo_cur_freq;
   19.42 +    uint32_t cpuinfo_max_freq;
   19.43 +    uint32_t cpuinfo_min_freq;
   19.44 +    uint32_t scaling_cur_freq;
   19.45 +
   19.46 +    char scaling_governor[CPUFREQ_NAME_LEN];
   19.47 +    uint32_t scaling_max_freq;
   19.48 +    uint32_t scaling_min_freq;
   19.49 +
   19.50 +    /* for specific governor */
   19.51 +    union {
   19.52 +        xc_userspace_t userspace;
   19.53 +        xc_ondemand_t ondemand;
   19.54 +    } u;
   19.55 +};
   19.56 +
   19.57 +int xc_get_cpufreq_para(int xc_handle, int cpuid,
   19.58 +                        struct xc_get_cpufreq_para *user_para);
   19.59 +int xc_set_cpufreq_gov(int xc_handle, int cpuid, char *govname);
   19.60 +int xc_set_cpufreq_para(int xc_handle, int cpuid,
   19.61 +                        int ctrl_type, int ctrl_value);
   19.62  #endif /* XENCTRL_H */
    20.1 --- a/tools/libxc/xg_private.c	Wed Dec 24 12:50:57 2008 +0900
    20.2 +++ b/tools/libxc/xg_private.c	Wed Dec 24 12:52:34 2008 +0900
    20.3 @@ -131,6 +131,7 @@ char *xc_inflate_buffer(const char *in_b
    20.4  
    20.5      /* Inflate in one pass/call */
    20.6      sts = inflate(&zStream, Z_FINISH);
    20.7 +    inflateEnd(&zStream);
    20.8      if ( sts != Z_STREAM_END )
    20.9      {
   20.10          ERROR("inflate failed, sts %d\n", sts);
    21.1 --- a/tools/misc/xen-detect.c	Wed Dec 24 12:50:57 2008 +0900
    21.2 +++ b/tools/misc/xen-detect.c	Wed Dec 24 12:52:34 2008 +0900
    21.3 @@ -50,17 +50,25 @@ static int check_for_xen(void)
    21.4  {
    21.5      uint32_t eax, ebx, ecx, edx;
    21.6      char signature[13];
    21.7 +    uint32_t base;
    21.8  
    21.9 -    cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
   21.10 -    *(uint32_t *)(signature + 0) = ebx;
   21.11 -    *(uint32_t *)(signature + 4) = ecx;
   21.12 -    *(uint32_t *)(signature + 8) = edx;
   21.13 -    signature[12] = '\0';
   21.14 +    for ( base = 0x40000000; base < 0x40001000; base += 0x100 )
   21.15 +    {
   21.16 +        cpuid(base, &eax, &ebx, &ecx, &edx);
   21.17 +
   21.18 +        *(uint32_t *)(signature + 0) = ebx;
   21.19 +        *(uint32_t *)(signature + 4) = ecx;
   21.20 +        *(uint32_t *)(signature + 8) = edx;
   21.21 +        signature[12] = '\0';
   21.22  
   21.23 -    if ( strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002) )
   21.24 -        return 0;
   21.25 +        if ( !strcmp("XenVMMXenVMM", signature) && (eax >= (base + 2)) )
   21.26 +            goto found;
   21.27 +    }
   21.28  
   21.29 -    cpuid(0x40000001, &eax, &ebx, &ecx, &edx);
   21.30 +    return 0;
   21.31 +
   21.32 + found:
   21.33 +    cpuid(base + 1, &eax, &ebx, &ecx, &edx);
   21.34      printf("Running in %s context on Xen v%d.%d.\n",
   21.35             pv_context ? "PV" : "HVM", (uint16_t)(eax >> 16), (uint16_t)eax);
   21.36      return 1;
    22.1 --- a/tools/misc/xenpm.c	Wed Dec 24 12:50:57 2008 +0900
    22.2 +++ b/tools/misc/xenpm.c	Wed Dec 24 12:52:34 2008 +0900
    22.3 @@ -16,199 +16,591 @@
    22.4   * Place - Suite 330, Boston, MA 02111-1307 USA.
    22.5   */
    22.6  
    22.7 +/* to eliminate warning on `strndup' */
    22.8 +#define _GNU_SOURCE
    22.9 +
   22.10  #include <stdio.h>
   22.11  #include <stdlib.h>
   22.12 +#include <string.h>
   22.13  #include <getopt.h>
   22.14  #include <errno.h>
   22.15  
   22.16  #include <xenctrl.h>
   22.17  #include <inttypes.h>
   22.18  
   22.19 -int main(int argc, char **argv)
   22.20 +#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
   22.21 +
   22.22 +/* help message */
   22.23 +void show_help(void)
   22.24 +{
   22.25 +    fprintf(stderr,
   22.26 +            "Usage:\n"
   22.27 +            "       xenpm get-cpuidle-states [cpuid]: list cpu idle information on CPU cpuid or all CPUs.\n"
   22.28 +            "       xenpm get-cpufreq-states [cpuid]: list cpu frequency information on CPU cpuid or all CPUs.\n"
   22.29 +            "       xenpm get-cpufreq-para [cpuid]: list cpu frequency information on CPU cpuid or all CPUs.\n"
   22.30 +            "       xenpm set-scaling-maxfreq <cpuid> <HZ>: set max cpu frequency <HZ> on CPU <cpuid>.\n"
   22.31 +            "       xenpm set-scaling-minfreq <cpuid> <HZ>: set min cpu frequency <HZ> on CPU <cpuid>.\n"
   22.32 +            "       xenpm set-scaling-governor <cpuid> <name>: set scaling governor on CPU <cpuid>.\n"
   22.33 +            "       xenpm set-scaling-speed <cpuid> <num>: set scaling speed on CPU <cpuid>.\n"
   22.34 +            "       xenpm set-sampling-rate <cpuid> <num>: set sampling rate on CPU <cpuid>.\n"
   22.35 +            "       xenpm set-up-threshold <cpuid> <num>: set up threshold on CPU <cpuid>.\n");
   22.36 +}
   22.37 +
   22.38 +/* wrapper function */
   22.39 +int help_func(int xc_fd, int cpuid, uint32_t value)
   22.40 +{
   22.41 +    show_help();
   22.42 +    return 0;
   22.43 +}
   22.44 +
   22.45 +/* show cpu idle information on CPU cpuid */
   22.46 +static int show_cx_cpuid(int xc_fd, int cpuid)
   22.47  {
   22.48 -    int xc_fd;
   22.49 -    int i, j, ret = 0;
   22.50 -    int cinfo = 0, pinfo = 0;
   22.51 -    int ch;
   22.52 +    int i, ret = 0;
   22.53 +    int max_cx_num = 0;
   22.54 +    struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
   22.55 +
   22.56 +    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
   22.57 +    if ( ret )
   22.58 +    {
   22.59 +        if ( errno == ENODEV )
   22.60 +        {
   22.61 +            fprintf(stderr, "Xen cpuidle is not enabled!\n");
   22.62 +            return -ENODEV;
   22.63 +        }
   22.64 +        else
   22.65 +        {
   22.66 +            fprintf(stderr, "[CPU%d] failed to get max C-state\n", cpuid);
   22.67 +            return -EINVAL;
   22.68 +        }
   22.69 +    }
   22.70 +
   22.71 +    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
   22.72 +    if ( !cxstat->triggers )
   22.73 +    {
   22.74 +        fprintf(stderr, "[CPU%d] failed to malloc for C-states triggers\n", cpuid);
   22.75 +        return -ENOMEM;
   22.76 +    }
   22.77 +    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
   22.78 +    if ( !cxstat->residencies )
   22.79 +    {
   22.80 +        fprintf(stderr, "[CPU%d] failed to malloc for C-states residencies\n", cpuid);
   22.81 +        free(cxstat->triggers);
   22.82 +        return -ENOMEM;
   22.83 +    }
   22.84 +
   22.85 +    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
   22.86 +    if( ret )
   22.87 +    {
   22.88 +        fprintf(stderr, "[CPU%d] failed to get C-states statistics "
   22.89 +                "information\n", cpuid);
   22.90 +        free(cxstat->triggers);
   22.91 +        free(cxstat->residencies);
   22.92 +        return -EINVAL;
   22.93 +    }
   22.94 +
   22.95 +    printf("cpu id               : %d\n", cpuid);
   22.96 +    printf("total C-states       : %d\n", cxstat->nr);
   22.97 +    printf("idle time(ms)        : %"PRIu64"\n",
   22.98 +           cxstat->idle_time/1000000UL);
   22.99 +    for ( i = 0; i < cxstat->nr; i++ )
  22.100 +    {
  22.101 +        printf("C%d                   : transition [%020"PRIu64"]\n",
  22.102 +               i, cxstat->triggers[i]);
  22.103 +        printf("                       residency  [%020"PRIu64" ms]\n",
  22.104 +               cxstat->residencies[i]/1000000UL);
  22.105 +    }
  22.106 +
  22.107 +    free(cxstat->triggers);
  22.108 +    free(cxstat->residencies);
  22.109 +
  22.110 +    printf("\n");
  22.111 +    return 0;
  22.112 +}
  22.113 +
  22.114 +int cxstates_func(int xc_fd, int cpuid, uint32_t value)
  22.115 +{
  22.116 +    int ret = 0;
  22.117      xc_physinfo_t physinfo = { 0 };
  22.118  
  22.119 -    while ( (ch = getopt(argc, argv, "cp")) != -1 )
  22.120 +    if ( cpuid < 0 )
  22.121      {
  22.122 -        switch ( ch )
  22.123 +        /* show cxstates on all cpu */
  22.124 +        ret = xc_physinfo(xc_fd, &physinfo);
  22.125 +        if ( ret )
  22.126 +        {
  22.127 +            fprintf(stderr, "failed to get the processor information\n");
  22.128 +        }
  22.129 +        else
  22.130          {
  22.131 -        case 'c':
  22.132 -            cinfo = 1;
  22.133 -            break;
  22.134 -        case 'p':
  22.135 -            pinfo = 1;
  22.136 -            break;
  22.137 -        default:
  22.138 -            fprintf(stderr, "%s [-p] [-c]\n", argv[0]);
  22.139 -            return -1;
  22.140 +            int i;
  22.141 +            for ( i = 0; i < physinfo.nr_cpus; i++ )
  22.142 +            {
  22.143 +                if ( (ret = show_cx_cpuid(xc_fd, i)) == -ENODEV )
  22.144 +                    break;
  22.145 +            }
  22.146 +        }
  22.147 +    }
  22.148 +    else
  22.149 +        ret = show_cx_cpuid(xc_fd, cpuid);
  22.150 +
  22.151 +    return ret;
  22.152 +}
  22.153 +
  22.154 +/* show cpu frequency information on CPU cpuid */
  22.155 +static int show_px_cpuid(int xc_fd, int cpuid)
  22.156 +{
  22.157 +    int i, ret = 0;
  22.158 +    int max_px_num = 0;
  22.159 +    struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
  22.160 +
  22.161 +    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
  22.162 +    if ( ret )
  22.163 +    {
  22.164 +        if ( errno == ENODEV )
  22.165 +        {
  22.166 +            printf("Xen cpufreq is not enabled!\n");
  22.167 +            return -ENODEV;
  22.168 +        }
  22.169 +        else
  22.170 +        {
  22.171 +            fprintf(stderr, "[CPU%d] failed to get max P-state\n", cpuid);
  22.172 +            return -EINVAL;
  22.173          }
  22.174      }
  22.175  
  22.176 -    if ( !cinfo && !pinfo )
  22.177 +    pxstat->trans_pt = malloc(max_px_num * max_px_num *
  22.178 +                              sizeof(uint64_t));
  22.179 +    if ( !pxstat->trans_pt )
  22.180 +    {
  22.181 +        fprintf(stderr, "[CPU%d] failed to malloc for P-states transition table\n", cpuid);
  22.182 +        return -ENOMEM;
  22.183 +    }
  22.184 +    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
  22.185 +    if ( !pxstat->pt )
  22.186 +    {
  22.187 +        fprintf(stderr, "[CPU%d] failed to malloc for P-states table\n", cpuid);
  22.188 +        free(pxstat->trans_pt);
  22.189 +        return -ENOMEM;
  22.190 +    }
  22.191 +
  22.192 +    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
  22.193 +    if( ret )
  22.194 +    {
  22.195 +        fprintf(stderr, "[CPU%d] failed to get P-states statistics information\n", cpuid);
  22.196 +        free(pxstat->trans_pt);
  22.197 +        free(pxstat->pt);
  22.198 +        return -ENOMEM;
  22.199 +    }
  22.200 +
  22.201 +    printf("cpu id               : %d\n", cpuid);
  22.202 +    printf("total P-states       : %d\n", pxstat->total);
  22.203 +    printf("usable P-states      : %d\n", pxstat->usable);
  22.204 +    printf("current frequency    : %"PRIu64" MHz\n",
  22.205 +           pxstat->pt[pxstat->cur].freq);
  22.206 +    for ( i = 0; i < pxstat->total; i++ )
  22.207 +    {
  22.208 +        if ( pxstat->cur == i )
  22.209 +            printf("*P%d", i);
  22.210 +        else
  22.211 +            printf("P%d ", i);
  22.212 +        printf("                  : freq       [%04"PRIu64" MHz]\n",
  22.213 +               pxstat->pt[i].freq);
  22.214 +        printf("                       transition [%020"PRIu64"]\n",
  22.215 +               pxstat->pt[i].count);
  22.216 +        printf("                       residency  [%020"PRIu64" ms]\n",
  22.217 +               pxstat->pt[i].residency/1000000UL);
  22.218 +    }
  22.219 +
  22.220 +    free(pxstat->trans_pt);
  22.221 +    free(pxstat->pt);
  22.222 +
  22.223 +    printf("\n");
  22.224 +    return 0;
  22.225 +}
  22.226 +
  22.227 +int pxstates_func(int xc_fd, int cpuid, uint32_t value)
  22.228 +{
  22.229 +    int ret = 0;
  22.230 +    xc_physinfo_t physinfo = { 0 };
  22.231 +
  22.232 +    if ( cpuid < 0 )
  22.233 +    {
  22.234 +        ret = xc_physinfo(xc_fd, &physinfo);
  22.235 +        if ( ret )
  22.236 +        {
  22.237 +            fprintf(stderr, "failed to get the processor information\n");
  22.238 +        }
  22.239 +        else
  22.240 +        {
  22.241 +            int i;
  22.242 +            for ( i = 0; i < physinfo.nr_cpus; i++ )
  22.243 +            {
  22.244 +                if ( (ret = show_px_cpuid(xc_fd, i)) == -ENODEV )
  22.245 +                    break;
  22.246 +            }
  22.247 +        }
  22.248 +    }
  22.249 +    else
  22.250 +        ret = show_px_cpuid(xc_fd, cpuid);
  22.251 +
  22.252 +    return ret;
  22.253 +}
  22.254 +
  22.255 +/* print out parameters about cpu frequency */
  22.256 +static void print_cpufreq_para(int cpuid, struct xc_get_cpufreq_para *p_cpufreq)
  22.257 +{
  22.258 +    int i;
  22.259 +
  22.260 +    printf("cpu id               : %d\n", cpuid);
  22.261 +
  22.262 +    printf("affected_cpus        :");
  22.263 +    for ( i = 0; i < p_cpufreq->cpu_num; i++ )
  22.264 +        if ( i == cpuid )
  22.265 +            printf(" *%d", p_cpufreq->affected_cpus[i]);
  22.266 +        else
  22.267 +            printf(" %d", p_cpufreq->affected_cpus[i]);
  22.268 +    printf("\n");
  22.269 +
  22.270 +    printf("cpuinfo frequency    : max [%u] min [%u] cur [%u]\n",
  22.271 +           p_cpufreq->cpuinfo_max_freq,
  22.272 +           p_cpufreq->cpuinfo_min_freq,
  22.273 +           p_cpufreq->cpuinfo_cur_freq);
  22.274 +
  22.275 +    printf("scaling_driver       : %s\n", p_cpufreq->scaling_driver);
  22.276 +
  22.277 +    printf("scaling_avail_gov    : %s\n",
  22.278 +           p_cpufreq->scaling_available_governors);
  22.279 +
  22.280 +    printf("current_governor     : %s\n", p_cpufreq->scaling_governor);
  22.281 +    if ( !strncmp(p_cpufreq->scaling_governor,
  22.282 +                  "userspace", CPUFREQ_NAME_LEN) )
  22.283 +    {
  22.284 +        printf("  userspace specific :\n");
  22.285 +        printf("    scaling_setspeed : %u\n",
  22.286 +               p_cpufreq->u.userspace.scaling_setspeed);
  22.287 +    }
  22.288 +    else if ( !strncmp(p_cpufreq->scaling_governor,
  22.289 +                       "ondemand", CPUFREQ_NAME_LEN) )
  22.290 +    {
  22.291 +        printf("  ondemand specific  :\n");
  22.292 +        printf("    sampling_rate    : max [%u] min [%u] cur [%u]\n",
  22.293 +               p_cpufreq->u.ondemand.sampling_rate_max,
  22.294 +               p_cpufreq->u.ondemand.sampling_rate_min,
  22.295 +               p_cpufreq->u.ondemand.sampling_rate);
  22.296 +        printf("    up_threshold     : %u\n",
  22.297 +               p_cpufreq->u.ondemand.up_threshold);
  22.298 +    }
  22.299 +
  22.300 +    printf("scaling_avail_freq   :");
  22.301 +    for ( i = 0; i < p_cpufreq->freq_num; i++ )
  22.302 +        if ( p_cpufreq->scaling_available_frequencies[i] == p_cpufreq->scaling_cur_freq )
  22.303 +            printf(" *%d", p_cpufreq->scaling_available_frequencies[i]);
  22.304 +        else
  22.305 +            printf(" %d", p_cpufreq->scaling_available_frequencies[i]);
  22.306 +    printf("\n");
  22.307 +
  22.308 +    printf("scaling frequency    : max [%u] min [%u] cur [%u]\n",
  22.309 +           p_cpufreq->scaling_max_freq,
  22.310 +           p_cpufreq->scaling_min_freq,
  22.311 +           p_cpufreq->scaling_cur_freq);
  22.312 +    printf("\n");
  22.313 +}
  22.314 +
  22.315 +/* show cpu frequency parameters information on CPU cpuid */
  22.316 +static int show_cpufreq_para_cpuid(int xc_fd, int cpuid)
  22.317 +{
  22.318 +    int ret = 0;
  22.319 +    struct xc_get_cpufreq_para cpufreq_para, *p_cpufreq = &cpufreq_para;
  22.320 +
  22.321 +    p_cpufreq->cpu_num = 0;
  22.322 +    p_cpufreq->freq_num = 0;
  22.323 +    p_cpufreq->gov_num = 0;
  22.324 +    p_cpufreq->affected_cpus = NULL;
  22.325 +    p_cpufreq->scaling_available_frequencies = NULL;
  22.326 +    p_cpufreq->scaling_available_governors = NULL;
  22.327 +
  22.328 +    do
  22.329      {
  22.330 -        cinfo = 1;
  22.331 -        pinfo = 1;
  22.332 +        free(p_cpufreq->affected_cpus);
  22.333 +        free(p_cpufreq->scaling_available_frequencies);
  22.334 +        free(p_cpufreq->scaling_available_governors);
  22.335 +
  22.336 +        p_cpufreq->affected_cpus = NULL;
  22.337 +        p_cpufreq->scaling_available_frequencies = NULL;
  22.338 +        p_cpufreq->scaling_available_governors = NULL;
  22.339 +
  22.340 +        if (!(p_cpufreq->affected_cpus =
  22.341 +              malloc(p_cpufreq->cpu_num * sizeof(uint32_t))))
  22.342 +        {
  22.343 +            fprintf(stderr,
  22.344 +                    "[CPU%d] failed to malloc for affected_cpus\n",
  22.345 +                    cpuid);
  22.346 +            ret = -ENOMEM;
  22.347 +            goto out;
  22.348 +        }
  22.349 +        if (!(p_cpufreq->scaling_available_frequencies =
  22.350 +              malloc(p_cpufreq->freq_num * sizeof(uint32_t))))
  22.351 +        {
  22.352 +            fprintf(stderr,
  22.353 +                    "[CPU%d] failed to malloc for scaling_available_frequencies\n",
  22.354 +                    cpuid);
  22.355 +            ret = -ENOMEM;
  22.356 +            goto out;
  22.357 +        }
  22.358 +        if (!(p_cpufreq->scaling_available_governors =
  22.359 +              malloc(p_cpufreq->gov_num * CPUFREQ_NAME_LEN * sizeof(char))))
  22.360 +        {
  22.361 +            fprintf(stderr,
  22.362 +                    "[CPU%d] failed to malloc for scaling_available_governors\n",
  22.363 +                    cpuid);
  22.364 +            ret = -ENOMEM;
  22.365 +            goto out;
  22.366 +        }
  22.367 +
  22.368 +        ret = xc_get_cpufreq_para(xc_fd, cpuid, p_cpufreq);
  22.369 +    } while ( ret && errno == EAGAIN );
  22.370 +
  22.371 +    if ( ret == 0 )
  22.372 +        print_cpufreq_para(cpuid, p_cpufreq);
  22.373 +    else if ( errno == ENODEV )
  22.374 +    {
  22.375 +        ret = -ENODEV;
  22.376 +        fprintf(stderr, "Xen cpufreq is not enabled!\n");
  22.377 +    }
  22.378 +    else
  22.379 +        fprintf(stderr,
  22.380 +                "[CPU%d] failed to get cpufreq parameter\n",
  22.381 +                cpuid);
  22.382 +
  22.383 +out:
  22.384 +    free(p_cpufreq->scaling_available_governors);
  22.385 +    free(p_cpufreq->scaling_available_frequencies);
  22.386 +    free(p_cpufreq->affected_cpus);
  22.387 +
  22.388 +    return ret;
  22.389 +}
  22.390 +
  22.391 +int cpufreq_para_func(int xc_fd, int cpuid, uint32_t value)
  22.392 +{
  22.393 +    int ret = 0;
  22.394 +    xc_physinfo_t physinfo = { 0 };
  22.395 +
  22.396 +    if ( cpuid < 0 )
  22.397 +    {
  22.398 +        ret = xc_physinfo(xc_fd, &physinfo);
  22.399 +        if ( ret )
  22.400 +        {
  22.401 +            fprintf(stderr, "failed to get the processor information\n");
  22.402 +        }
  22.403 +        else
  22.404 +        {
  22.405 +            int i;
  22.406 +            for ( i = 0; i < physinfo.nr_cpus; i++ )
  22.407 +            {
  22.408 +                if ( (ret = show_cpufreq_para_cpuid(xc_fd, i)) == -ENODEV )
  22.409 +                    break;
  22.410 +            }
  22.411 +        }
  22.412 +    }
  22.413 +    else
  22.414 +        ret = show_cpufreq_para_cpuid(xc_fd, cpuid);
  22.415 +
  22.416 +    return ret;
  22.417 +}
  22.418 +
  22.419 +int scaling_max_freq_func(int xc_fd, int cpuid, uint32_t value)
  22.420 +{
  22.421 +    int ret = 0;
  22.422 +
  22.423 +    if ( cpuid < 0 )
  22.424 +    {
  22.425 +        show_help();
  22.426 +        return -EINVAL;
  22.427 +    }
  22.428 +
  22.429 +    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, value);
  22.430 +    if ( ret )
  22.431 +    {
  22.432 +        fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", cpuid);
  22.433 +    }
  22.434 +
  22.435 +    return ret;
  22.436 +}
  22.437 +
  22.438 +int scaling_min_freq_func(int xc_fd, int cpuid, uint32_t value)
  22.439 +{
  22.440 +    int ret;
  22.441 +
  22.442 +    if ( cpuid < 0 )
  22.443 +    {
  22.444 +        show_help();
  22.445 +        return -EINVAL;
  22.446 +    }
  22.447 +
  22.448 +    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, value);
  22.449 +    if ( ret )
  22.450 +    {
  22.451 +        fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", cpuid);
  22.452 +    }
  22.453 +
  22.454 +    return ret;
  22.455 +}
  22.456 +
  22.457 +int scaling_speed_func(int xc_fd, int cpuid, uint32_t value)
  22.458 +{
  22.459 +    int ret;
  22.460 +
  22.461 +    if ( cpuid < 0 )
  22.462 +    {
  22.463 +        show_help();
  22.464 +        return -EINVAL;
  22.465 +    }
  22.466 +
  22.467 +    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, value);
  22.468 +    if ( ret )
  22.469 +    {
  22.470 +        fprintf(stderr, "[CPU%d] failed to set scaling speed\n", cpuid);
  22.471 +    }
  22.472 +
  22.473 +    return ret;
  22.474 +}
  22.475 +
  22.476 +int scaling_sampling_rate_func(int xc_fd, int cpuid, uint32_t value)
  22.477 +{
  22.478 +    int ret;
  22.479 +
  22.480 +    if ( cpuid < 0 )
  22.481 +    {
  22.482 +        show_help();
  22.483 +        return -EINVAL;
  22.484 +    }
  22.485 +
  22.486 +    ret = xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, value);
  22.487 +    if ( ret ) 
  22.488 +    {
  22.489 +        fprintf(stderr, "[CPU%d] failed to set scaling sampling rate\n", cpuid);
  22.490 +    }
  22.491 +
  22.492 +    return ret;
  22.493 +}
  22.494 +
  22.495 +int scaling_up_threshold_func(int xc_fd, int cpuid, uint32_t value)
  22.496 +{
  22.497 +    int ret;
  22.498 +
  22.499 +    if ( cpuid < 0 )
  22.500 +    {
  22.501 +        show_help();
  22.502 +        return -EINVAL;
  22.503 +    }
  22.504 +
  22.505 +    ret = xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, value);
  22.506 +    if ( ret )
  22.507 +    {
  22.508 +        fprintf(stderr, "[CPU%d] failed to set scaling threshold\n", cpuid);
  22.509 +    }
  22.510 +
  22.511 +    return ret;
  22.512 +}
  22.513 +
  22.514 +int scaling_governor_func(int xc_fd, int cpuid, char *name)
  22.515 +{
  22.516 +    int ret = 0;
  22.517 +
  22.518 +    if ( cpuid < 0 )
  22.519 +    {
  22.520 +        show_help();
  22.521 +        return -EINVAL;
  22.522 +    }
  22.523 +
  22.524 +    ret = xc_set_cpufreq_gov(xc_fd, cpuid, name);
  22.525 +    if ( ret )
  22.526 +    {
  22.527 +        fprintf(stderr, "failed to set cpufreq governor to %s\n", name);
  22.528 +    }
  22.529 +
  22.530 +    return ret;
  22.531 +}
  22.532 +
  22.533 +struct {
  22.534 +    const char *name;
  22.535 +    int (*function)(int xc_fd, int cpuid, uint32_t value);
  22.536 +} main_options[] = {
  22.537 +    { "help", help_func },
  22.538 +    { "get-cpuidle-states", cxstates_func },
  22.539 +    { "get-cpufreq-states", pxstates_func },
  22.540 +    { "get-cpufreq-para", cpufreq_para_func },
  22.541 +    { "set-scaling-maxfreq", scaling_max_freq_func },
  22.542 +    { "set-scaling-minfreq", scaling_min_freq_func },
  22.543 +    { "set-scaling-governor", NULL },
  22.544 +    { "set-scaling-speed", scaling_speed_func },
  22.545 +    { "set-sampling-rate", scaling_sampling_rate_func },
  22.546 +    { "set-up-threshold", scaling_up_threshold_func },
  22.547 +};
  22.548 +
  22.549 +int main(int argc, char *argv[])
  22.550 +{
  22.551 +    int i, ret = -EINVAL;
  22.552 +    int xc_fd;
  22.553 +    int cpuid = -1;
  22.554 +    uint32_t value = 0;
  22.555 +    int nr_matches = 0;
  22.556 +    int matches_main_options[ARRAY_SIZE(main_options)];
  22.557 +
  22.558 +    if ( argc < 2 )
  22.559 +    {
  22.560 +        show_help();
  22.561 +        return ret;
  22.562 +    }
  22.563 +
  22.564 +    if ( argc > 2 )
  22.565 +    {
  22.566 +        if ( sscanf(argv[2], "%d", &cpuid) != 1 )
  22.567 +            cpuid = -1;
  22.568      }
  22.569  
  22.570      xc_fd = xc_interface_open();
  22.571      if ( xc_fd < 0 )
  22.572      {
  22.573          fprintf(stderr, "failed to get the handler\n");
  22.574 -        return xc_fd;
  22.575 -    }
  22.576 -
  22.577 -    ret = xc_physinfo(xc_fd, &physinfo);
  22.578 -    if ( ret )
  22.579 -    {
  22.580 -        fprintf(stderr, "failed to get the processor information\n");
  22.581 -        xc_interface_close(xc_fd);
  22.582 -        return ret;
  22.583      }
  22.584  
  22.585 -    /* print out the C state information */
  22.586 -    if ( cinfo )
  22.587 +    for ( i = 0; i < ARRAY_SIZE(main_options); i++ )
  22.588      {
  22.589 -        int max_cx_num = 0;
  22.590 -        struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
  22.591 -
  22.592 -        for ( i = 0; i < physinfo.nr_cpus; i++ )
  22.593 +        if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) )
  22.594          {
  22.595 -            ret = xc_pm_get_max_cx(xc_fd, i, &max_cx_num);
  22.596 -            if ( ret )
  22.597 -            {
  22.598 -                if ( errno == ENODEV )
  22.599 -                {
  22.600 -                    fprintf(stderr, "Xen cpuidle is not enabled!\n");
  22.601 -                    break;
  22.602 -                }
  22.603 -                else
  22.604 -                {
  22.605 -                    fprintf(stderr, "[CPU%d] failed to get max C-state\n", i);
  22.606 -                    continue;
  22.607 -                }
  22.608 -            }
  22.609 -
  22.610 -            cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
  22.611 -            if ( !cxstat->triggers )
  22.612 -            {
  22.613 -                fprintf(stderr, "failed to malloc for C-states triggers\n");
  22.614 -                break;
  22.615 -            }
  22.616 -            cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
  22.617 -            if ( !cxstat->residencies )
  22.618 -            {
  22.619 -                fprintf(stderr, "failed to malloc for C-states residencies\n");
  22.620 -                free(cxstat->triggers);
  22.621 -                break;
  22.622 -            }
  22.623 -
  22.624 -            ret = xc_pm_get_cxstat(xc_fd, i, cxstat);
  22.625 -            if( ret )
  22.626 -            {
  22.627 -                fprintf(stderr, "[CPU%d] failed to get C-states statistics "
  22.628 -                        "information\n", i);
  22.629 -                free(cxstat->triggers);
  22.630 -                free(cxstat->residencies);
  22.631 -                continue;
  22.632 -            }
  22.633 -
  22.634 -            printf("cpu id               : %d\n", i);
  22.635 -            printf("total C-states       : %d\n", cxstat->nr);
  22.636 -            printf("idle time(ms)        : %"PRIu64"\n",
  22.637 -                   cxstat->idle_time/1000000UL);
  22.638 -            for ( j = 0; j < cxstat->nr; j++ )
  22.639 -            {
  22.640 -                printf("C%d                   : transition [%020"PRIu64"]\n",
  22.641 -                       j, cxstat->triggers[j]);
  22.642 -                printf("                       residency  [%020"PRIu64" ms]\n",
  22.643 -                       cxstat->residencies[j]*1000000UL/3579/1000000UL);
  22.644 -            }
  22.645 -
  22.646 -            free(cxstat->triggers);
  22.647 -            free(cxstat->residencies);
  22.648 -
  22.649 -            printf("\n");
  22.650 +            matches_main_options[nr_matches++] = i;
  22.651          }
  22.652      }
  22.653  
  22.654 -    /* print out P state information */
  22.655 -    if ( pinfo )
  22.656 +    if ( nr_matches > 1 )
  22.657      {
  22.658 -        int max_px_num = 0;
  22.659 -        struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
  22.660 -
  22.661 -        for ( i = 0; i < physinfo.nr_cpus; i++ )
  22.662 +        fprintf(stderr, "Ambigious options: ");
  22.663 +        for ( i = 0; i < nr_matches; i++ )
  22.664 +            fprintf(stderr, " %s", main_options[matches_main_options[i]].name);
  22.665 +        fprintf(stderr, "\n");
  22.666 +    }
  22.667 +    else if ( nr_matches == 1 )
  22.668 +    {
  22.669 +        if ( !strcmp("set-scaling-governor", main_options[matches_main_options[0]].name) )
  22.670          {
  22.671 -            ret = xc_pm_get_max_px(xc_fd, i, &max_px_num);
  22.672 -            if ( ret )
  22.673 -            {
  22.674 -                if ( errno == ENODEV )
  22.675 -                {
  22.676 -                    printf("Xen cpufreq is not enabled!\n");
  22.677 -                    break;
  22.678 -                }
  22.679 -                else
  22.680 -                {
  22.681 -                    fprintf(stderr, "[CPU%d] failed to get max P-state\n", i);
  22.682 -                    continue;
  22.683 -                }
  22.684 -            }
  22.685 -
  22.686 -            pxstat->trans_pt = malloc(max_px_num * max_px_num *
  22.687 -                                      sizeof(uint64_t));
  22.688 -            if ( !pxstat->trans_pt )
  22.689 -            {
  22.690 -                fprintf(stderr, "failed to malloc for P-states "
  22.691 -                        "transition table\n");
  22.692 -                break;
  22.693 -            }
  22.694 -            pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
  22.695 -            if ( !pxstat->pt )
  22.696 +            char *name = strdup(argv[3]);
  22.697 +            ret = scaling_governor_func(xc_fd, cpuid, name);
  22.698 +            free(name);
  22.699 +        }
  22.700 +        else
  22.701 +        {
  22.702 +            if ( argc > 3 )
  22.703              {
  22.704 -                fprintf(stderr, "failed to malloc for P-states table\n");
  22.705 -                free(pxstat->trans_pt);
  22.706 -                break;
  22.707 -            }
  22.708 -
  22.709 -            ret = xc_pm_get_pxstat(xc_fd, i, pxstat);
  22.710 -            if( ret )
  22.711 -            {
  22.712 -                fprintf(stderr, "[CPU%d] failed to get P-states "
  22.713 -                        "statistics information\n", i);
  22.714 -                free(pxstat->trans_pt);
  22.715 -                free(pxstat->pt);
  22.716 -                continue;
  22.717 +                if ( sscanf(argv[3], "%d", &value) != 1 )
  22.718 +                    value = 0;
  22.719              }
  22.720 -
  22.721 -            printf("cpu id               : %d\n", i);
  22.722 -            printf("total P-states       : %d\n", pxstat->total);
  22.723 -            printf("usable P-states      : %d\n", pxstat->usable);
  22.724 -            printf("current frequency    : %"PRIu64" MHz\n",
  22.725 -                   pxstat->pt[pxstat->cur].freq);
  22.726 -            for ( j = 0; j < pxstat->total; j++ )
  22.727 -            {
  22.728 -                if ( pxstat->cur == j )
  22.729 -                    printf("*P%d", j);
  22.730 -                else
  22.731 -                    printf("P%d ", j);
  22.732 -                printf("                  : freq       [%04"PRIu64" MHz]\n",
  22.733 -                       pxstat->pt[j].freq);
  22.734 -                printf("                       transition [%020"PRIu64"]\n",
  22.735 -                       pxstat->pt[j].count);
  22.736 -                printf("                       residency  [%020"PRIu64" ms]\n",
  22.737 -                       pxstat->pt[j].residency/1000000UL);
  22.738 -            }
  22.739 -
  22.740 -            free(pxstat->trans_pt);
  22.741 -            free(pxstat->pt);
  22.742 -
  22.743 -            printf("\n");
  22.744 +            ret = main_options[matches_main_options[0]].function(xc_fd, cpuid, value);
  22.745          }
  22.746      }
  22.747 +    else
  22.748 +        show_help();
  22.749  
  22.750      xc_interface_close(xc_fd);
  22.751      return ret;
    23.1 --- a/tools/python/xen/lowlevel/acm/acm.c	Wed Dec 24 12:50:57 2008 +0900
    23.2 +++ b/tools/python/xen/lowlevel/acm/acm.c	Wed Dec 24 12:52:34 2008 +0900
    23.3 @@ -68,6 +68,8 @@ static void *__getssid(int domid, uint32
    23.4          goto out2;
    23.5      } else {
    23.6          *buflen = SSID_BUFFER_SIZE;
    23.7 +        free(buf);
    23.8 +        buf = NULL;
    23.9          goto out2;
   23.10      }
   23.11   out2:
    24.1 --- a/tools/python/xen/lowlevel/flask/flask.c	Wed Dec 24 12:50:57 2008 +0900
    24.2 +++ b/tools/python/xen/lowlevel/flask/flask.c	Wed Dec 24 12:52:34 2008 +0900
    24.3 @@ -55,6 +55,7 @@ static PyObject *pyflask_context_to_sid(
    24.4      xc_handle = xc_interface_open();
    24.5      if (xc_handle < 0) {
    24.6          errno = xc_handle;
    24.7 +        free(buf);
    24.8          return PyErr_SetFromErrno(xc_error_obj);
    24.9      }
   24.10      
    25.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Wed Dec 24 12:50:57 2008 +0900
    25.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Wed Dec 24 12:52:34 2008 +0900
    25.3 @@ -678,19 +678,22 @@ static PyObject *pyxc_get_device_group(X
    25.4  
    25.5      if ( rc < 0 )
    25.6      {
    25.7 -      free(sdev_array); 
    25.8 -      return pyxc_error_to_exception();
    25.9 +        free(sdev_array); 
   25.10 +        return pyxc_error_to_exception();
   25.11      }
   25.12  
   25.13      if ( !num_sdevs )
   25.14      {
   25.15 -       free(sdev_array);
   25.16 -       return Py_BuildValue("s", "");
   25.17 +        free(sdev_array);
   25.18 +        return Py_BuildValue("s", "");
   25.19      }
   25.20  
   25.21      group_str = calloc(num_sdevs, sizeof(dev_str));
   25.22      if (group_str == NULL)
   25.23 +    {
   25.24 +        free(sdev_array);
   25.25          return PyErr_NoMemory();
   25.26 +    }
   25.27  
   25.28      for ( i = 0; i < num_sdevs; i++ )
   25.29      {
    26.1 --- a/tools/python/xen/lowlevel/xs/xs.c	Wed Dec 24 12:50:57 2008 +0900
    26.2 +++ b/tools/python/xen/lowlevel/xs/xs.c	Wed Dec 24 12:52:34 2008 +0900
    26.3 @@ -336,15 +336,19 @@ static PyObject *xspy_set_permissions(Xs
    26.4  	xs_set_error(EINVAL);
    26.5          goto exit;
    26.6      }
    26.7 +
    26.8      xsperms_n = PyList_Size(perms);
    26.9 -    xsperms = calloc(xsperms_n, sizeof(struct xs_permissions));
   26.10 +    /* NB. alloc +1 so we can change the owner if necessary. */
   26.11 +    xsperms = calloc(xsperms_n + 1, sizeof(struct xs_permissions));
   26.12      if (!xsperms) {
   26.13  	xs_set_error(ENOMEM);
   26.14          goto exit;
   26.15      }
   26.16 +
   26.17      tuple0 = PyTuple_New(0);
   26.18      if (!tuple0)
   26.19          goto exit;
   26.20 +
   26.21      for (i = 0; i < xsperms_n; i++) {
   26.22          /* Read/write perms. Set these. */
   26.23          int p_read = 0, p_write = 0;
   26.24 @@ -357,6 +361,17 @@ static PyObject *xspy_set_permissions(Xs
   26.25          if (p_write)
   26.26              xsperms[i].perms |= XS_PERM_WRITE;
   26.27      }
   26.28 +
   26.29 +    /*
   26.30 +     * Is the caller trying to restrict access to the first specified
   26.31 +     * domain? If so then it cannot be owner, so we force dom0 as owner.
   26.32 +     */
   26.33 +    if (xsperms_n && xsperms[0].perms && xsperms[0].id) {
   26.34 +        memmove(&xsperms[1], &xsperms[0], xsperms_n * sizeof(*xsperms));
   26.35 +        xsperms[0].id = xsperms[0].perms = 0;
   26.36 +        xsperms_n++;
   26.37 +    }
   26.38 +
   26.39      Py_BEGIN_ALLOW_THREADS
   26.40      result = xs_set_permissions(xh, th, path, xsperms, xsperms_n);
   26.41      Py_END_ALLOW_THREADS
    27.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Wed Dec 24 12:50:57 2008 +0900
    27.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Wed Dec 24 12:52:34 2008 +0900
    27.3 @@ -253,7 +253,7 @@ def restore(xd, fd, dominfo = None, paus
    27.4          # set memory limit
    27.5          xc.domain_setmaxmem(dominfo.getDomid(), maxmem)
    27.6  
    27.7 -        balloon.free(memory + shadow)
    27.8 +        balloon.free(memory + shadow, dominfo)
    27.9  
   27.10          shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024)
   27.11          dominfo.info['shadow_memory'] = shadow_cur
    28.1 --- a/tools/python/xen/xend/XendConfig.py	Wed Dec 24 12:50:57 2008 +0900
    28.2 +++ b/tools/python/xen/xend/XendConfig.py	Wed Dec 24 12:52:34 2008 +0900
    28.3 @@ -1289,7 +1289,6 @@ class XendConfig(dict):
    28.4                      pass
    28.5  
    28.6              if dev_type == 'vbd':
    28.7 -                dev_info['bootable'] = 0
    28.8                  if dev_info.get('dev', '').startswith('ioemu:'):
    28.9                      dev_info['driver'] = 'ioemu'
   28.10                  else:
   28.11 @@ -1325,7 +1324,7 @@ class XendConfig(dict):
   28.12                  if param not in target:
   28.13                      target[param] = []
   28.14                  if dev_uuid not in target[param]:
   28.15 -                    if dev_type == 'vbd':
   28.16 +                    if dev_type == 'vbd' and 'bootable' not in dev_info:
   28.17                          # Compat hack -- mark first disk bootable
   28.18                          dev_info['bootable'] = int(not target[param])
   28.19                      target[param].append(dev_uuid)
   28.20 @@ -1333,8 +1332,9 @@ class XendConfig(dict):
   28.21                  if 'vbd_refs' not in target:
   28.22                      target['vbd_refs'] = []
   28.23                  if dev_uuid not in target['vbd_refs']:
   28.24 -                    # Compat hack -- mark first disk bootable
   28.25 -                    dev_info['bootable'] = int(not target['vbd_refs'])
   28.26 +                    if 'bootable' not in dev_info:
   28.27 +                        # Compat hack -- mark first disk bootable
   28.28 +                        dev_info['bootable'] = int(not target['vbd_refs'])
   28.29                      target['vbd_refs'].append(dev_uuid)
   28.30                      
   28.31              elif dev_type == 'vfb':
    29.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Wed Dec 24 12:50:57 2008 +0900
    29.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Wed Dec 24 12:52:34 2008 +0900
    29.3 @@ -517,7 +517,8 @@ class XendDomainInfo:
    29.4          # HVM domain shuts itself down only if it has PV drivers
    29.5          if self.info.is_hvm():
    29.6              hvm_pvdrv = xc.hvm_get_param(self.domid, HVM_PARAM_CALLBACK_IRQ)
    29.7 -            if not hvm_pvdrv:
    29.8 +            hvm_s_state = xc.hvm_get_param(self.domid, HVM_PARAM_ACPI_S_STATE)
    29.9 +            if not hvm_pvdrv or hvm_s_state != 0:
   29.10                  code = REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
   29.11                  log.info("HVM save:remote shutdown dom %d!", self.domid)
   29.12                  xc.domain_shutdown(self.domid, code)
   29.13 @@ -2104,7 +2105,7 @@ class XendDomainInfo:
   29.14          # overhead is greater for some types of domain than others. For
   29.15          # example, an x86 HVM domain will have a default shadow-pagetable
   29.16          # allocation of 1MB. We free up 2MB here to be on the safe side.
   29.17 -        balloon.free(2*1024) # 2MB should be plenty
   29.18 +        balloon.free(2*1024, self) # 2MB should be plenty
   29.19  
   29.20          ssidref = 0
   29.21          if security.on() == xsconstants.XS_POLICY_USE:
   29.22 @@ -2298,7 +2299,7 @@ class XendDomainInfo:
   29.23              vtd_mem = ((vtd_mem + 1023) / 1024) * 1024
   29.24  
   29.25              # Make sure there's enough RAM available for the domain
   29.26 -            balloon.free(memory + shadow + vtd_mem)
   29.27 +            balloon.free(memory + shadow + vtd_mem, self)
   29.28  
   29.29              # Set up the shadow memory
   29.30              shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
   29.31 @@ -2715,7 +2716,7 @@ class XendDomainInfo:
   29.32              # The domain might already have some shadow memory
   29.33              overhead_kb -= xc.shadow_mem_control(self.domid) * 1024
   29.34          if overhead_kb > 0:
   29.35 -            balloon.free(overhead_kb)
   29.36 +            balloon.free(overhead_kb, self)
   29.37  
   29.38      def _unwatchVm(self):
   29.39          """Remove the watch on the VM path, if any.  Idempotent.  Nothrow
    30.1 --- a/tools/python/xen/xend/balloon.py	Wed Dec 24 12:50:57 2008 +0900
    30.2 +++ b/tools/python/xen/xend/balloon.py	Wed Dec 24 12:52:34 2008 +0900
    30.3 @@ -67,7 +67,7 @@ def get_dom0_target_alloc():
    30.4          raise VmError('Failed to query target memory allocation of dom0.')
    30.5      return kb
    30.6  
    30.7 -def free(need_mem):
    30.8 +def free(need_mem ,self):
    30.9      """Balloon out memory from the privileged domain so that there is the
   30.10      specified required amount (in KiB) free.
   30.11      """
   30.12 @@ -122,6 +122,40 @@ def free(need_mem):
   30.13          if need_mem >= max_free_mem:
   30.14              retries = rlimit
   30.15  
   30.16 +        # Check whethercurrent machine is a numa system and the new 
   30.17 +        # created hvm has all its vcpus in the same node, if all the 
   30.18 +        # conditions above are fit. We will wait until all the pages 
   30.19 +        # in scrub list are freed (if waiting time go beyond 20s, 
   30.20 +        # we will stop waiting it.)
   30.21 +        if physinfo['nr_nodes'] > 1 and retries == 0:
   30.22 +            oldnode = -1
   30.23 +            waitscrub = 1
   30.24 +            vcpus = self.info['cpus'][0]
   30.25 +            for vcpu in vcpus:
   30.26 +                nodenum = 0
   30.27 +                for node in physinfo['node_to_cpu']:
   30.28 +                    for cpu in node:
   30.29 +                        if vcpu == cpu:
   30.30 +                            if oldnode == -1:
   30.31 +                                oldnode = nodenum
   30.32 +                            elif oldnode != nodenum:
   30.33 +                                waitscrub = 0
   30.34 +                    nodenum = nodenum + 1
   30.35 +
   30.36 +            if waitscrub == 1 and scrub_mem > 0:
   30.37 +                log.debug("wait for scrub %s", scrub_mem)
   30.38 +                while scrub_mem > 0 and retries < rlimit:
   30.39 +                    time.sleep(sleep_time)
   30.40 +                    physinfo = xc.physinfo()
   30.41 +                    free_mem = physinfo['free_memory']
   30.42 +                    scrub_mem = physinfo['scrub_memory']
   30.43 +                    retries += 1
   30.44 +                    sleep_time += SLEEP_TIME_GROWTH
   30.45 +                log.debug("scrub for %d times", retries)
   30.46 +
   30.47 +            retries = 0
   30.48 +            sleep_time = SLEEP_TIME_GROWTH
   30.49 +
   30.50          while retries < rlimit:
   30.51              physinfo = xc.physinfo()
   30.52              free_mem = physinfo['free_memory']
    31.1 --- a/tools/python/xen/xend/server/blkif.py	Wed Dec 24 12:50:57 2008 +0900
    31.2 +++ b/tools/python/xen/xend/server/blkif.py	Wed Dec 24 12:52:34 2008 +0900
    31.3 @@ -78,6 +78,10 @@ class BlkifController(DevController):
    31.4          if uuid:
    31.5              back['uuid'] = uuid
    31.6  
    31.7 +        bootable = config.get('bootable', None)
    31.8 +        if bootable != None:
    31.9 +            back['bootable'] = str(bootable)
   31.10 +
   31.11          if security.on() == xsconstants.XS_POLICY_USE:
   31.12              self.do_access_control(config, uname)
   31.13  
   31.14 @@ -143,11 +147,12 @@ class BlkifController(DevController):
   31.15          config = DevController.getDeviceConfiguration(self, devid, transaction)
   31.16          if transaction is None:
   31.17              devinfo = self.readBackend(devid, 'dev', 'type', 'params', 'mode',
   31.18 -                                       'uuid')
   31.19 +                                       'uuid', 'bootable')
   31.20          else:
   31.21              devinfo = self.readBackendTxn(transaction, devid,
   31.22 -                                          'dev', 'type', 'params', 'mode', 'uuid')
   31.23 -        dev, typ, params, mode, uuid = devinfo
   31.24 +                                          'dev', 'type', 'params', 'mode', 'uuid',
   31.25 +                                          'bootable')
   31.26 +        dev, typ, params, mode, uuid, bootable = devinfo
   31.27          
   31.28          if dev:
   31.29              if transaction is None:
   31.30 @@ -165,6 +170,8 @@ class BlkifController(DevController):
   31.31              config['mode'] = mode
   31.32          if uuid:
   31.33              config['uuid'] = uuid
   31.34 +        if bootable != None:
   31.35 +            config['bootable'] = int(bootable)
   31.36  
   31.37          proto = self.readFrontend(devid, 'protocol')
   31.38          if proto:
    32.1 --- a/tools/xcutils/xc_save.c	Wed Dec 24 12:50:57 2008 +0900
    32.2 +++ b/tools/xcutils/xc_save.c	Wed Dec 24 12:52:34 2008 +0900
    32.3 @@ -166,18 +166,12 @@ static int suspend(void)
    32.4  {
    32.5      unsigned long sx_state = 0;
    32.6  
    32.7 -    /* Nothing to do if the guest is in an ACPI sleep state. */
    32.8 +    /* Cannot notify guest to shut itself down if it's in ACPI sleep state. */
    32.9      if (si.flags & XCFLAGS_HVM)
   32.10          xc_get_hvm_param(si.xc_fd, si.domid,
   32.11                           HVM_PARAM_ACPI_S_STATE, &sx_state);
   32.12 -    if (sx_state != 0) {
   32.13 -        /* notify xend that it can do device migration */
   32.14 -        printf("suspended\n");
   32.15 -        fflush(stdout);
   32.16 -        return 1;
   32.17 -    }
   32.18  
   32.19 -    if (si.suspend_evtchn >= 0)
   32.20 +    if ((sx_state == 0) && (si.suspend_evtchn >= 0))
   32.21          return evtchn_suspend();
   32.22  
   32.23      return compat_suspend();
    33.1 --- a/tools/xenpmd/xenpmd.c	Wed Dec 24 12:50:57 2008 +0900
    33.2 +++ b/tools/xenpmd/xenpmd.c	Wed Dec 24 12:52:34 2008 +0900
    33.3 @@ -297,7 +297,6 @@ int get_next_battery_info_or_status(DIR 
    33.4      if  ( !info_or_status )
    33.5          return 0;
    33.6  
    33.7 -    memset(line_info, 0, 256);
    33.8      if (type == BIF) 
    33.9          memset(info_or_status, 0, sizeof(struct battery_info));
   33.10      else 
   33.11 @@ -307,11 +306,8 @@ int get_next_battery_info_or_status(DIR 
   33.12      if ( !file )
   33.13          return 0;
   33.14  
   33.15 -    while ( fgets(line_info, 1024, file) != NULL ) 
   33.16 -    {
   33.17 +    while ( fgets(line_info, sizeof(line_info), file) != NULL ) 
   33.18          parse_battery_info_or_status(line_info, type, info_or_status);
   33.19 -        memset(line_info, 0, 256);
   33.20 -    }
   33.21  
   33.22      fclose(file);
   33.23      return 1;
    34.1 --- a/tools/xenstat/xentop/xentop.c	Wed Dec 24 12:50:57 2008 +0900
    34.2 +++ b/tools/xenstat/xentop/xentop.c	Wed Dec 24 12:52:34 2008 +0900
    34.3 @@ -254,7 +254,7 @@ static void fail(const char *str)
    34.4  {
    34.5  	if(cwin != NULL && !isendwin())
    34.6  		endwin();
    34.7 -	fprintf(stderr, str);
    34.8 +	fprintf(stderr, "%s", str);
    34.9  	exit(1);
   34.10  }
   34.11  
    35.1 --- a/unmodified_drivers/linux-2.6/Makefile	Wed Dec 24 12:50:57 2008 +0900
    35.2 +++ b/unmodified_drivers/linux-2.6/Makefile	Wed Dec 24 12:52:34 2008 +0900
    35.3 @@ -4,3 +4,4 @@ obj-m += platform-pci/
    35.4  obj-m += balloon/
    35.5  obj-m += blkfront/
    35.6  obj-m += netfront/
    35.7 +obj-m += scsifront/
    36.1 --- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h	Wed Dec 24 12:50:57 2008 +0900
    36.2 +++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h	Wed Dec 24 12:52:34 2008 +0900
    36.3 @@ -147,9 +147,11 @@ extern char *kasprintf(gfp_t gfp, const 
    36.4   *   RHEL_VERSION
    36.5   */
    36.6  #if !defined(RHEL_VERSION) || (RHEL_VERSION == 4 && RHEL_UPDATE < 5)
    36.7 +#if !defined(RHEL_MAJOR) || (RHEL_MAJOR == 4 && RHEL_MINOR < 5)
    36.8  typedef irqreturn_t (*irq_handler_t)(int, void *, struct pt_regs *);
    36.9  #endif
   36.10  #endif
   36.11 +#endif
   36.12  
   36.13  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
   36.14  #define setup_xen_features xen_setup_features
    37.1 --- a/unmodified_drivers/linux-2.6/overrides.mk	Wed Dec 24 12:50:57 2008 +0900
    37.2 +++ b/unmodified_drivers/linux-2.6/overrides.mk	Wed Dec 24 12:52:34 2008 +0900
    37.3 @@ -15,3 +15,4 @@ endif
    37.4  
    37.5  EXTRA_CFLAGS += $(_XEN_CPPFLAGS)
    37.6  EXTRA_AFLAGS += $(_XEN_CPPFLAGS)
    37.7 +CPPFLAGS := -I$(M)/include $(CPPFLAGS)
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/unmodified_drivers/linux-2.6/scsifront/Kbuild	Wed Dec 24 12:52:34 2008 +0900
    38.3 @@ -0,0 +1,6 @@
    38.4 +include $(M)/overrides.mk
    38.5 +
    38.6 +obj-m += xen-scsi.o
    38.7 +
    38.8 +xen-scsi-objs := scsifront.o xenbus.o
    38.9 +
    39.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.2 +++ b/unmodified_drivers/linux-2.6/scsifront/Makefile	Wed Dec 24 12:52:34 2008 +0900
    39.3 @@ -0,0 +1,3 @@
    39.4 +ifneq ($(KERNELRELEASE),)
    39.5 +include $(src)/Kbuild
    39.6 +endif
    40.1 --- a/xen/arch/ia64/xen/cpufreq/cpufreq.c	Wed Dec 24 12:50:57 2008 +0900
    40.2 +++ b/xen/arch/ia64/xen/cpufreq/cpufreq.c	Wed Dec 24 12:52:34 2008 +0900
    40.3 @@ -275,6 +275,7 @@ acpi_cpufreq_cpu_exit (struct cpufreq_po
    40.4  }
    40.5  
    40.6  static struct cpufreq_driver acpi_cpufreq_driver = {
    40.7 +	.name       = "acpi-cpufreq",
    40.8  	.verify     = acpi_cpufreq_verify,
    40.9  	.target     = acpi_cpufreq_target,
   40.10  	.get        = acpi_cpufreq_get,
    41.1 --- a/xen/arch/x86/Makefile	Wed Dec 24 12:50:57 2008 +0900
    41.2 +++ b/xen/arch/x86/Makefile	Wed Dec 24 12:52:34 2008 +0900
    41.3 @@ -37,7 +37,6 @@ obj-y += nmi.o
    41.4  obj-y += numa.o
    41.5  obj-y += pci.o
    41.6  obj-y += physdev.o
    41.7 -obj-y += rwlock.o
    41.8  obj-y += setup.o
    41.9  obj-y += shutdown.o
   41.10  obj-y += smp.o
    42.1 --- a/xen/arch/x86/acpi/cpu_idle.c	Wed Dec 24 12:50:57 2008 +0900
    42.2 +++ b/xen/arch/x86/acpi/cpu_idle.c	Wed Dec 24 12:52:34 2008 +0900
    42.3 @@ -71,7 +71,8 @@ static struct acpi_processor_power *__re
    42.4  
    42.5  static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
    42.6  {
    42.7 -    uint32_t i;
    42.8 +    uint32_t i, idle_usage = 0;
    42.9 +    uint64_t res, idle_res = 0;
   42.10  
   42.11      printk("==cpu%d==\n", cpu);
   42.12      printk("active state:\t\tC%d\n",
   42.13 @@ -81,14 +82,21 @@ static void print_acpi_power(uint32_t cp
   42.14      
   42.15      for ( i = 1; i < power->count; i++ )
   42.16      {
   42.17 +        res = acpi_pm_tick_to_ns(power->states[i].time);
   42.18 +        idle_usage += power->states[i].usage;
   42.19 +        idle_res += res;
   42.20 +
   42.21          printk((power->last_state && power->last_state->idx == i) ?
   42.22                 "   *" : "    ");
   42.23          printk("C%d:\t", i);
   42.24          printk("type[C%d] ", power->states[i].type);
   42.25          printk("latency[%03d] ", power->states[i].latency);
   42.26          printk("usage[%08d] ", power->states[i].usage);
   42.27 -        printk("duration[%"PRId64"]\n", power->states[i].time);
   42.28 +        printk("duration[%"PRId64"]\n", res);
   42.29      }
   42.30 +    printk("    C0:\tusage[%08d] duration[%"PRId64"]\n",
   42.31 +           idle_usage, NOW() - idle_res);
   42.32 +
   42.33  }
   42.34  
   42.35  static void dump_cx(unsigned char key)
   42.36 @@ -317,8 +325,6 @@ static void acpi_processor_idle(void)
   42.37           * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
   42.38           * deep C state can't work correctly.
   42.39           */
   42.40 -        /* preparing TSC stop */
   42.41 -        cstate_save_tsc();
   42.42          /* preparing APIC stop */
   42.43          lapic_timer_off();
   42.44  
   42.45 @@ -751,8 +757,7 @@ uint32_t pmstat_get_cx_nr(uint32_t cpuid
   42.46  int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
   42.47  {
   42.48      const struct acpi_processor_power *power = processor_powers[cpuid];
   42.49 -    struct vcpu *v = idle_vcpu[cpuid];
   42.50 -    uint64_t usage;
   42.51 +    uint64_t usage, res, idle_usage = 0, idle_res = 0;
   42.52      int i;
   42.53  
   42.54      if ( power == NULL )
   42.55 @@ -765,20 +770,26 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
   42.56  
   42.57      stat->last = power->last_state ? power->last_state->idx : 0;
   42.58      stat->nr = power->count;
   42.59 -    stat->idle_time = v->runstate.time[RUNSTATE_running];
   42.60 -    if ( v->is_running )
   42.61 -        stat->idle_time += NOW() - v->runstate.state_entry_time;
   42.62 +    stat->idle_time = get_cpu_idle_time(cpuid);
   42.63  
   42.64 -    for ( i = 0; i < power->count; i++ )
   42.65 +    for ( i = power->count - 1; i >= 0; i-- )
   42.66      {
   42.67 -        usage = power->states[i].usage;
   42.68 -        if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) )
   42.69 +        if ( i != 0 )
   42.70 +        {
   42.71 +            usage = power->states[i].usage;
   42.72 +            res = acpi_pm_tick_to_ns(power->states[i].time);
   42.73 +            idle_usage += usage;
   42.74 +            idle_res += res;
   42.75 +        }
   42.76 +        else
   42.77 +        {
   42.78 +            usage = idle_usage;
   42.79 +            res = NOW() - idle_res;
   42.80 +        }
   42.81 +        if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) ||
   42.82 +             copy_to_guest_offset(stat->residencies, i, &res, 1) )
   42.83              return -EFAULT;
   42.84      }
   42.85 -    for ( i = 0; i < power->count; i++ )
   42.86 -        if ( copy_to_guest_offset(stat->residencies, i, 
   42.87 -                                  &power->states[i].time, 1) )
   42.88 -            return -EFAULT;
   42.89  
   42.90      return 0;
   42.91  }
    43.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Wed Dec 24 12:50:57 2008 +0900
    43.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Wed Dec 24 12:52:34 2008 +0900
    43.3 @@ -131,10 +131,13 @@ struct drv_cmd {
    43.4      u32 val;
    43.5  };
    43.6  
    43.7 -static void do_drv_read(struct drv_cmd *cmd)
    43.8 +static void do_drv_read(void *drvcmd)
    43.9  {
   43.10 +    struct drv_cmd *cmd;
   43.11      u32 h;
   43.12  
   43.13 +    cmd = (struct drv_cmd *)drvcmd;
   43.14 +
   43.15      switch (cmd->type) {
   43.16      case SYSTEM_INTEL_MSR_CAPABLE:
   43.17          rdmsr(cmd->addr.msr.reg, cmd->val, h);
   43.18 @@ -174,7 +177,13 @@ static void drv_read(struct drv_cmd *cmd
   43.19  {
   43.20      cmd->val = 0;
   43.21  
   43.22 -    do_drv_read(cmd);
   43.23 +    ASSERT(cpus_weight(cmd->mask) == 1);
   43.24 +
   43.25 +    /* to reduce IPI for the sake of performance */
   43.26 +    if (cpu_isset(smp_processor_id(), cmd->mask))
   43.27 +        do_drv_read((void *)cmd);
   43.28 +    else
   43.29 +        on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1);
   43.30  }
   43.31  
   43.32  static void drv_write(struct drv_cmd *cmd)
   43.33 @@ -184,13 +193,21 @@ static void drv_write(struct drv_cmd *cm
   43.34  
   43.35  static u32 get_cur_val(cpumask_t mask)
   43.36  {
   43.37 +    struct cpufreq_policy *policy;
   43.38      struct processor_performance *perf;
   43.39      struct drv_cmd cmd;
   43.40 +    unsigned int cpu;
   43.41  
   43.42      if (unlikely(cpus_empty(mask)))
   43.43          return 0;
   43.44  
   43.45 -    switch (drv_data[first_cpu(mask)]->cpu_feature) {
   43.46 +    cpu = first_cpu(mask);
   43.47 +    policy = cpufreq_cpu_policy[cpu];
   43.48 +
   43.49 +    if (!policy)
   43.50 +        return 0;    
   43.51 +
   43.52 +    switch (drv_data[policy->cpu]->cpu_feature) {
   43.53      case SYSTEM_INTEL_MSR_CAPABLE:
   43.54          cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
   43.55          cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
   43.56 @@ -205,7 +222,7 @@ static u32 get_cur_val(cpumask_t mask)
   43.57          return 0;
   43.58      }
   43.59  
   43.60 -    cmd.mask = mask;
   43.61 +    cmd.mask = cpumask_of_cpu(cpu);
   43.62  
   43.63      drv_read(&cmd);
   43.64      return cmd.val;
   43.65 @@ -255,28 +272,43 @@ static void  __get_measured_perf(void *p
   43.66  
   43.67  static unsigned int get_measured_perf(unsigned int cpu)
   43.68  {
   43.69 -    unsigned int retval, perf_percent;
   43.70 +    struct cpufreq_policy *policy;
   43.71 +    unsigned int perf_percent;
   43.72      cpumask_t cpumask;
   43.73  
   43.74      if (!cpu_online(cpu))
   43.75          return 0;
   43.76  
   43.77 -    cpumask = cpumask_of_cpu(cpu);
   43.78 -    on_selected_cpus(cpumask, __get_measured_perf, (void *)&perf_percent,0,1);
   43.79 +    policy = cpufreq_cpu_policy[cpu];
   43.80 +    if (!policy)
   43.81 +        return 0;
   43.82  
   43.83 -    retval = drv_data[cpu]->max_freq * perf_percent / 100;
   43.84 -    return retval;
   43.85 +    /* Usually we take the short path (no IPI) for the sake of performance. */
   43.86 +    if (cpu == smp_processor_id()) {
   43.87 +        __get_measured_perf((void *)&perf_percent);
   43.88 +    } else {
   43.89 +        cpumask = cpumask_of_cpu(cpu);
   43.90 +        on_selected_cpus(cpumask, __get_measured_perf, 
   43.91 +                        (void *)&perf_percent,0,1);
   43.92 +    }
   43.93 +
   43.94 +    return drv_data[cpu]->max_freq * perf_percent / 100;
   43.95  }
   43.96  
   43.97  static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
   43.98  {
   43.99 -    struct acpi_cpufreq_data *data = drv_data[cpu];
  43.100 +    struct cpufreq_policy *policy;
  43.101 +    struct acpi_cpufreq_data *data;
  43.102      unsigned int freq;
  43.103  
  43.104 +    policy = cpufreq_cpu_policy[cpu];
  43.105 +    if (!policy)
  43.106 +        return 0;
  43.107 +
  43.108 +    data = drv_data[policy->cpu];
  43.109      if (unlikely(data == NULL ||
  43.110 -        data->acpi_data == NULL || data->freq_table == NULL)) {
  43.111 +        data->acpi_data == NULL || data->freq_table == NULL))
  43.112          return 0;
  43.113 -    }
  43.114  
  43.115      freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data);
  43.116      return freq;
  43.117 @@ -327,16 +359,10 @@ static int acpi_cpufreq_target(struct cp
  43.118  
  43.119      next_perf_state = data->freq_table[next_state].index;
  43.120      if (perf->state == next_perf_state) {
  43.121 -        if (unlikely(policy->resume)) {
  43.122 -            printk(KERN_INFO "Called after resume, resetting to P%d\n", 
  43.123 -                next_perf_state);
  43.124 +        if (unlikely(policy->resume))
  43.125              policy->resume = 0;
  43.126 -        }
  43.127 -        else {
  43.128 -            printk(KERN_DEBUG "Already at target state (P%d)\n", 
  43.129 -                next_perf_state);
  43.130 +        else
  43.131              return 0;
  43.132 -        }
  43.133      }
  43.134  
  43.135      switch (data->cpu_feature) {
  43.136 @@ -555,6 +581,7 @@ static int acpi_cpufreq_cpu_exit(struct 
  43.137  }
  43.138  
  43.139  static struct cpufreq_driver acpi_cpufreq_driver = {
  43.140 +    .name   = "acpi-cpufreq",
  43.141      .verify = acpi_cpufreq_verify,
  43.142      .target = acpi_cpufreq_target,
  43.143      .init   = acpi_cpufreq_cpu_init,
    44.1 --- a/xen/arch/x86/acpi/cpufreq/powernow.c	Wed Dec 24 12:50:57 2008 +0900
    44.2 +++ b/xen/arch/x86/acpi/cpufreq/powernow.c	Wed Dec 24 12:52:34 2008 +0900
    44.3 @@ -129,6 +129,16 @@ static int powernow_cpufreq_target(struc
    44.4      return result;
    44.5  }
    44.6  
    44.7 +static int powernow_cpufreq_verify(struct cpufreq_policy *policy)
    44.8 +{
    44.9 +    struct powernow_cpufreq_data *data;
   44.10 +
   44.11 +    if (!policy || !(data = drv_data[policy->cpu]))
   44.12 +        return -EINVAL;
   44.13 +
   44.14 +    return cpufreq_frequency_table_verify(policy, data->freq_table);
   44.15 +}
   44.16 +
   44.17  static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy)
   44.18  {
   44.19      unsigned int i;
   44.20 @@ -243,6 +253,7 @@ static int powernow_cpufreq_cpu_exit(str
   44.21  }
   44.22  
   44.23  static struct cpufreq_driver powernow_cpufreq_driver = {
   44.24 +    .verify = powernow_cpufreq_verify,
   44.25      .target = powernow_cpufreq_target,
   44.26      .init   = powernow_cpufreq_cpu_init,
   44.27      .exit   = powernow_cpufreq_cpu_exit
    45.1 --- a/xen/arch/x86/apic.c	Wed Dec 24 12:50:57 2008 +0900
    45.2 +++ b/xen/arch/x86/apic.c	Wed Dec 24 12:52:34 2008 +0900
    45.3 @@ -99,8 +99,11 @@ void __init apic_intr_init(void)
    45.4      /* Performance Counters Interrupt */
    45.5      set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
    45.6  
    45.7 -    /* thermal monitor LVT interrupt */
    45.8 -#ifdef CONFIG_X86_MCE_P4THERMAL
    45.9 +    /* CMCI Correctable Machine Check Interrupt */
   45.10 +    set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt);
   45.11 +
   45.12 +    /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/
   45.13 +#ifdef CONFIG_X86_MCE_THERMAL
   45.14      set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
   45.15  #endif
   45.16  }
   45.17 @@ -172,12 +175,17 @@ void clear_local_APIC(void)
   45.18      }
   45.19  
   45.20  /* lets not touch this if we didn't frob it */
   45.21 -#ifdef CONFIG_X86_MCE_P4THERMAL
   45.22 +#ifdef CONFIG_X86_MCE_THERMAL
   45.23      if (maxlvt >= 5) {
   45.24          v = apic_read(APIC_LVTTHMR);
   45.25          apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
   45.26      }
   45.27  #endif
   45.28 +
   45.29 +    if (maxlvt >= 6) {
   45.30 +        v = apic_read(APIC_CMCI);
   45.31 +        apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED);
   45.32 +    }
   45.33      /*
   45.34       * Clean APIC state for other OSs:
   45.35       */
   45.36 @@ -189,10 +197,13 @@ void clear_local_APIC(void)
   45.37      if (maxlvt >= 4)
   45.38          apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
   45.39  
   45.40 -#ifdef CONFIG_X86_MCE_P4THERMAL
   45.41 +#ifdef CONFIG_X86_MCE_THERMAL
   45.42      if (maxlvt >= 5)
   45.43          apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
   45.44  #endif
   45.45 +    if (maxlvt >= 6)
   45.46 +        apic_write_around(APIC_CMCI, APIC_LVT_MASKED);
   45.47 +
   45.48      v = GET_APIC_VERSION(apic_read(APIC_LVR));
   45.49      if (APIC_INTEGRATED(v)) {  /* !82489DX */
   45.50          if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
   45.51 @@ -597,6 +608,7 @@ static struct {
   45.52      unsigned int apic_spiv;
   45.53      unsigned int apic_lvtt;
   45.54      unsigned int apic_lvtpc;
   45.55 +    unsigned int apic_lvtcmci;
   45.56      unsigned int apic_lvt0;
   45.57      unsigned int apic_lvt1;
   45.58      unsigned int apic_lvterr;
   45.59 @@ -608,7 +620,7 @@ static struct {
   45.60  int lapic_suspend(void)
   45.61  {
   45.62      unsigned long flags;
   45.63 -
   45.64 +    int maxlvt = get_maxlvt();
   45.65      if (!apic_pm_state.active)
   45.66          return 0;
   45.67  
   45.68 @@ -620,6 +632,11 @@ int lapic_suspend(void)
   45.69      apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
   45.70      apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
   45.71      apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
   45.72 +
   45.73 +    if (maxlvt >= 6) {
   45.74 +        apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
   45.75 +    }
   45.76 +
   45.77      apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
   45.78      apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
   45.79      apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
   45.80 @@ -637,6 +654,7 @@ int lapic_resume(void)
   45.81  {
   45.82      unsigned int l, h;
   45.83      unsigned long flags;
   45.84 +    int maxlvt = get_maxlvt();
   45.85  
   45.86      if (!apic_pm_state.active)
   45.87          return 0;
   45.88 @@ -669,6 +687,11 @@ int lapic_resume(void)
   45.89      apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
   45.90      apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
   45.91      apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
   45.92 +
   45.93 +    if (maxlvt >= 6) {
   45.94 +        apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
   45.95 +    }
   45.96 +
   45.97      apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
   45.98      apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
   45.99      apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
    46.1 --- a/xen/arch/x86/cpu/amd.c	Wed Dec 24 12:50:57 2008 +0900
    46.2 +++ b/xen/arch/x86/cpu/amd.c	Wed Dec 24 12:52:34 2008 +0900
    46.3 @@ -461,8 +461,10 @@ static void __devinit init_amd(struct cp
    46.4  
    46.5  	if (cpuid_eax(0x80000000) >= 0x80000007) {
    46.6  		c->x86_power = cpuid_edx(0x80000007);
    46.7 -		if (c->x86_power & (1<<8))
    46.8 +		if (c->x86_power & (1<<8)) {
    46.9  			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
   46.10 +			set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
   46.11 +		}
   46.12  	}
   46.13  
   46.14  #ifdef CONFIG_X86_HT
    47.1 --- a/xen/arch/x86/cpu/intel.c	Wed Dec 24 12:50:57 2008 +0900
    47.2 +++ b/xen/arch/x86/cpu/intel.c	Wed Dec 24 12:52:34 2008 +0900
    47.3 @@ -218,6 +218,10 @@ static void __devinit init_intel(struct 
    47.4  	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
    47.5  		(c->x86 == 0x6 && c->x86_model >= 0x0e))
    47.6  		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
    47.7 +	if (cpuid_edx(0x80000007) & (1u<<8)) {
    47.8 +		set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
    47.9 +		set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
   47.10 +	}
   47.11  
   47.12  	start_vmx();
   47.13  }
    48.1 --- a/xen/arch/x86/cpu/mcheck/Makefile	Wed Dec 24 12:50:57 2008 +0900
    48.2 +++ b/xen/arch/x86/cpu/mcheck/Makefile	Wed Dec 24 12:52:34 2008 +0900
    48.3 @@ -3,8 +3,7 @@ obj-y += k7.o
    48.4  obj-y += amd_k8.o
    48.5  obj-y += amd_f10.o
    48.6  obj-y += mce.o
    48.7 +obj-y += mce_intel.o
    48.8  obj-y += non-fatal.o
    48.9 -obj-y += p4.o
   48.10  obj-$(x86_32) += p5.o
   48.11 -obj-$(x86_32) += p6.o
   48.12  obj-$(x86_32) += winchip.o
    49.1 --- a/xen/arch/x86/cpu/mcheck/amd_k8.c	Wed Dec 24 12:50:57 2008 +0900
    49.2 +++ b/xen/arch/x86/cpu/mcheck/amd_k8.c	Wed Dec 24 12:52:34 2008 +0900
    49.3 @@ -219,7 +219,7 @@ void k8_machine_check(struct cpu_user_re
    49.4  			show_execution_state(regs);
    49.5  		}
    49.6  		x86_mcinfo_dump(mc_data);
    49.7 -		panic("End of MCE. Use mcelog to decode above error codes.\n");
    49.8 +		mc_panic("End of MCE. Use mcelog to decode above error codes.\n");
    49.9  	}
   49.10  
   49.11  	/* If Dom0 registered a machine check handler, which is only possible
   49.12 @@ -248,7 +248,7 @@ void k8_machine_check(struct cpu_user_re
   49.13  			/* Dom0 is impacted. Since noone can't handle
   49.14  			 * this error, panic! */
   49.15  			x86_mcinfo_dump(mc_data);
   49.16 -			panic("MCE occured in Dom0, which it can't handle\n");
   49.17 +			mc_panic("MCE occured in Dom0, which it can't handle\n");
   49.18  
   49.19  			/* UNREACHED */
   49.20  		} else {
    50.1 --- a/xen/arch/x86/cpu/mcheck/k7.c	Wed Dec 24 12:50:57 2008 +0900
    50.2 +++ b/xen/arch/x86/cpu/mcheck/k7.c	Wed Dec 24 12:52:34 2008 +0900
    50.3 @@ -14,6 +14,7 @@
    50.4  #include <asm/msr.h>
    50.5  
    50.6  #include "mce.h"
    50.7 +#include "x86_mca.h"
    50.8  
    50.9  /* Machine Check Handler For AMD Athlon/Duron */
   50.10  static fastcall void k7_machine_check(struct cpu_user_regs * regs, long error_code)
   50.11 @@ -57,9 +58,9 @@ static fastcall void k7_machine_check(st
   50.12  	}
   50.13  
   50.14  	if (recover&2)
   50.15 -		panic ("CPU context corrupt");
   50.16 +		mc_panic ("CPU context corrupt");
   50.17  	if (recover&1)
   50.18 -		panic ("Unable to continue");
   50.19 +		mc_panic ("Unable to continue");
   50.20  	printk (KERN_EMERG "Attempting to continue.\n");
   50.21  	mcgstl &= ~(1<<2);
   50.22  	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
    51.1 --- a/xen/arch/x86/cpu/mcheck/mce.c	Wed Dec 24 12:50:57 2008 +0900
    51.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c	Wed Dec 24 12:52:34 2008 +0900
    51.3 @@ -9,6 +9,7 @@
    51.4  #include <xen/config.h>
    51.5  #include <xen/smp.h>
    51.6  #include <xen/errno.h>
    51.7 +#include <xen/console.h>
    51.8  
    51.9  #include <asm/processor.h> 
   51.10  #include <asm/system.h>
   51.11 @@ -26,7 +27,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-
   51.12   * to physical cpus present in the machine.
   51.13   * The more physical cpus are available, the more entries you need.
   51.14   */
   51.15 -#define MAX_MCINFO	10
   51.16 +#define MAX_MCINFO	20
   51.17  
   51.18  struct mc_machine_notify {
   51.19  	struct mc_info mc;
   51.20 @@ -109,6 +110,12 @@ static void amd_mcheck_init(struct cpuin
   51.21  	}
   51.22  }
   51.23  
   51.24 +/*check the existence of Machine Check*/
   51.25 +int mce_available(struct cpuinfo_x86 *c)
   51.26 +{
   51.27 +	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
   51.28 +}
   51.29 +
   51.30  /* This has to be run for each processor */
   51.31  void mcheck_init(struct cpuinfo_x86 *c)
   51.32  {
   51.33 @@ -134,11 +141,13 @@ void mcheck_init(struct cpuinfo_x86 *c)
   51.34  #ifndef CONFIG_X86_64
   51.35  		if (c->x86==5)
   51.36  			intel_p5_mcheck_init(c);
   51.37 -		if (c->x86==6)
   51.38 -			intel_p6_mcheck_init(c);
   51.39  #endif
   51.40 -		if (c->x86==15)
   51.41 -			intel_p4_mcheck_init(c);
   51.42 +		/*If it is P6 or P4 family, including CORE 2 DUO series*/
   51.43 +		if (c->x86 == 6 || c->x86==15)
   51.44 +		{
   51.45 +			printk(KERN_DEBUG "MCE: Intel newly family MC Init\n");
   51.46 +			intel_mcheck_init(c);
   51.47 +		}
   51.48  		break;
   51.49  
   51.50  #ifndef CONFIG_X86_64
   51.51 @@ -412,7 +421,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
   51.52  		if (mic == NULL)
   51.53  			return;
   51.54  		if (mic->type != MC_TYPE_BANK)
   51.55 -			continue;
   51.56 +			goto next;
   51.57  
   51.58  		mc_bank = (struct mcinfo_bank *)mic;
   51.59  	
   51.60 @@ -425,6 +434,7 @@ void x86_mcinfo_dump(struct mc_info *mi)
   51.61  			printk(" at %16"PRIx64, mc_bank->mc_addr);
   51.62  
   51.63  		printk("\n");
   51.64 +next:
   51.65  		mic = x86_mcinfo_next(mic); /* next entry */
   51.66  		if ((mic == NULL) || (mic->size == 0))
   51.67  			break;
   51.68 @@ -574,3 +584,15 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
   51.69  
   51.70  	return ret;
   51.71  }
   51.72 +
   51.73 +void mc_panic(char *s)
   51.74 +{
   51.75 +    console_start_sync();
   51.76 +    printk("Fatal machine check: %s\n", s);
   51.77 +    printk("\n"
   51.78 +           "****************************************\n"
   51.79 +           "\n"
   51.80 +           "   The processor has reported a hardware error which cannot\n"
   51.81 +           "   be recovered from.  Xen will now reboot the machine.\n");
   51.82 +    panic("HARDWARE ERROR");
   51.83 +}
    52.1 --- a/xen/arch/x86/cpu/mcheck/mce.h	Wed Dec 24 12:50:57 2008 +0900
    52.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h	Wed Dec 24 12:52:34 2008 +0900
    52.3 @@ -1,14 +1,22 @@
    52.4  #include <xen/init.h>
    52.5 +#include <asm/types.h>
    52.6  #include <asm/traps.h>
    52.7 +#include <asm/atomic.h>
    52.8 +#include <asm/percpu.h>
    52.9 +
   52.10  
   52.11  /* Init functions */
   52.12  void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
   52.13  void amd_k7_mcheck_init(struct cpuinfo_x86 *c);
   52.14  void amd_k8_mcheck_init(struct cpuinfo_x86 *c);
   52.15  void amd_f10_mcheck_init(struct cpuinfo_x86 *c);
   52.16 -void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
   52.17 +
   52.18 +
   52.19 +void intel_mcheck_timer(struct cpuinfo_x86 *c);
   52.20  void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
   52.21 -void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
   52.22 +void intel_mcheck_init(struct cpuinfo_x86 *c);
   52.23 +void mce_intel_feature_init(struct cpuinfo_x86 *c);
   52.24 +
   52.25  void winchip_mcheck_init(struct cpuinfo_x86 *c);
   52.26  
   52.27  /* Function pointer used in the handlers to collect additional information
   52.28 @@ -19,12 +27,11 @@ extern int (*mc_callback_bank_extended)(
   52.29  		uint16_t bank, uint64_t status);
   52.30  
   52.31  
   52.32 +int mce_available(struct cpuinfo_x86 *c);
   52.33  /* Helper functions used for collecting error telemetry */
   52.34  struct mc_info *x86_mcinfo_getptr(void);
   52.35  void x86_mcinfo_clear(struct mc_info *mi);
   52.36  int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
   52.37  void x86_mcinfo_dump(struct mc_info *mi);
   52.38 +void mc_panic(char *s);
   52.39  
   52.40 -/* Global variables */
   52.41 -extern int mce_disabled;
   52.42 -extern unsigned int nr_mce_banks;
    53.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    53.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Wed Dec 24 12:52:34 2008 +0900
    53.3 @@ -0,0 +1,632 @@
    53.4 +#include <xen/init.h>
    53.5 +#include <xen/types.h>
    53.6 +#include <xen/irq.h>
    53.7 +#include <xen/event.h>
    53.8 +#include <xen/kernel.h>
    53.9 +#include <xen/smp.h>
   53.10 +#include <asm/processor.h> 
   53.11 +#include <asm/system.h>
   53.12 +#include <asm/msr.h>
   53.13 +#include "mce.h"
   53.14 +#include "x86_mca.h"
   53.15 +
   53.16 +DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
   53.17 +
   53.18 +static int nr_intel_ext_msrs = 0;
   53.19 +static int cmci_support = 0;
   53.20 +extern int firstbank;
   53.21 +
   53.22 +#ifdef CONFIG_X86_MCE_THERMAL
   53.23 +static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
   53.24 +{
   53.25 +    printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n",
   53.26 +                smp_processor_id());
   53.27 +    add_taint(TAINT_MACHINE_CHECK);
   53.28 +}
   53.29 +
   53.30 +/* P4/Xeon Thermal transition interrupt handler */
   53.31 +static void intel_thermal_interrupt(struct cpu_user_regs *regs)
   53.32 +{
   53.33 +    u32 l, h;
   53.34 +    unsigned int cpu = smp_processor_id();
   53.35 +    static s_time_t next[NR_CPUS];
   53.36 +
   53.37 +    ack_APIC_irq();
   53.38 +    if (NOW() < next[cpu])
   53.39 +        return;
   53.40 +
   53.41 +    next[cpu] = NOW() + MILLISECS(5000);
   53.42 +    rdmsr(MSR_IA32_THERM_STATUS, l, h);
   53.43 +    if (l & 0x1) {
   53.44 +        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
   53.45 +        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
   53.46 +                cpu);
   53.47 +        add_taint(TAINT_MACHINE_CHECK);
   53.48 +    } else {
   53.49 +        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
   53.50 +    }
   53.51 +}
   53.52 +
   53.53 +/* Thermal interrupt handler for this CPU setup */
   53.54 +static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) 
   53.55 +        = unexpected_thermal_interrupt;
   53.56 +
   53.57 +fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
   53.58 +{
   53.59 +    irq_enter();
   53.60 +    vendor_thermal_interrupt(regs);
   53.61 +    irq_exit();
   53.62 +}
   53.63 +
   53.64 +/* P4/Xeon Thermal regulation detect and init */
   53.65 +static void intel_init_thermal(struct cpuinfo_x86 *c)
   53.66 +{
   53.67 +    u32 l, h;
   53.68 +    int tm2 = 0;
   53.69 +    unsigned int cpu = smp_processor_id();
   53.70 +
   53.71 +    /* Thermal monitoring */
   53.72 +    if (!cpu_has(c, X86_FEATURE_ACPI))
   53.73 +        return; /* -ENODEV */
   53.74 +
   53.75 +    /* Clock modulation */
   53.76 +    if (!cpu_has(c, X86_FEATURE_ACC))
   53.77 +        return; /* -ENODEV */
   53.78 +
   53.79 +    /* first check if its enabled already, in which case there might
   53.80 +     * be some SMM goo which handles it, so we can't even put a handler
   53.81 +     * since it might be delivered via SMI already -zwanem.
   53.82 +     */
   53.83 +    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
   53.84 +    h = apic_read(APIC_LVTTHMR);
   53.85 +    if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
   53.86 +        printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu);
   53.87 +        return; /* -EBUSY */
   53.88 +    }
   53.89 +
   53.90 +    if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
   53.91 +        tm2 = 1;
   53.92 +
   53.93 +    /* check whether a vector already exists, temporarily masked? */
   53.94 +    if (h & APIC_VECTOR_MASK) {
   53.95 +        printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n",
   53.96 +                 cpu, (h & APIC_VECTOR_MASK));
   53.97 +        return; /* -EBUSY */
   53.98 +    }
   53.99 +
  53.100 +    /* The temperature transition interrupt handler setup */
  53.101 +    h = THERMAL_APIC_VECTOR;    /* our delivery vector */
  53.102 +    h |= (APIC_DM_FIXED | APIC_LVT_MASKED);  /* we'll mask till we're ready */
  53.103 +    apic_write_around(APIC_LVTTHMR, h);
  53.104 +
  53.105 +    rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
  53.106 +    wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
  53.107 +
  53.108 +    /* ok we're good to go... */
  53.109 +    vendor_thermal_interrupt = intel_thermal_interrupt;
  53.110 +
  53.111 +    rdmsr (MSR_IA32_MISC_ENABLE, l, h);
  53.112 +    wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
  53.113 +
  53.114 +    l = apic_read (APIC_LVTTHMR);
  53.115 +    apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
  53.116 +    printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 
  53.117 +            cpu, tm2 ? "TM2" : "TM1");
  53.118 +    return;
  53.119 +}
  53.120 +#endif /* CONFIG_X86_MCE_THERMAL */
  53.121 +
  53.122 +static inline void intel_get_extended_msrs(struct mcinfo_extended *mc_ext)
  53.123 +{
  53.124 +    if (nr_intel_ext_msrs == 0)
  53.125 +        return;
  53.126 +
  53.127 +    /*this function will called when CAP(9).MCG_EXT_P = 1*/
  53.128 +    memset(mc_ext, 0, sizeof(struct mcinfo_extended));
  53.129 +    mc_ext->common.type = MC_TYPE_EXTENDED;
  53.130 +    mc_ext->common.size = sizeof(mc_ext);
  53.131 +    mc_ext->mc_msrs = 10;
  53.132 +
  53.133 +    mc_ext->mc_msr[0].reg = MSR_IA32_MCG_EAX;
  53.134 +    rdmsrl(MSR_IA32_MCG_EAX, mc_ext->mc_msr[0].value);
  53.135 +    mc_ext->mc_msr[1].reg = MSR_IA32_MCG_EBX;
  53.136 +    rdmsrl(MSR_IA32_MCG_EBX, mc_ext->mc_msr[1].value);
  53.137 +    mc_ext->mc_msr[2].reg = MSR_IA32_MCG_ECX;
  53.138 +    rdmsrl(MSR_IA32_MCG_ECX, mc_ext->mc_msr[2].value);
  53.139 +
  53.140 +    mc_ext->mc_msr[3].reg = MSR_IA32_MCG_EDX;
  53.141 +    rdmsrl(MSR_IA32_MCG_EDX, mc_ext->mc_msr[3].value);
  53.142 +    mc_ext->mc_msr[4].reg = MSR_IA32_MCG_ESI;
  53.143 +    rdmsrl(MSR_IA32_MCG_ESI, mc_ext->mc_msr[4].value);
  53.144 +    mc_ext->mc_msr[5].reg = MSR_IA32_MCG_EDI;
  53.145 +    rdmsrl(MSR_IA32_MCG_EDI, mc_ext->mc_msr[5].value);
  53.146 +
  53.147 +    mc_ext->mc_msr[6].reg = MSR_IA32_MCG_EBP;
  53.148 +    rdmsrl(MSR_IA32_MCG_EBP, mc_ext->mc_msr[6].value);
  53.149 +    mc_ext->mc_msr[7].reg = MSR_IA32_MCG_ESP;
  53.150 +    rdmsrl(MSR_IA32_MCG_ESP, mc_ext->mc_msr[7].value);
  53.151 +    mc_ext->mc_msr[8].reg = MSR_IA32_MCG_EFLAGS;
  53.152 +    rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext->mc_msr[8].value);
  53.153 +    mc_ext->mc_msr[9].reg = MSR_IA32_MCG_EIP;
  53.154 +    rdmsrl(MSR_IA32_MCG_EIP, mc_ext->mc_msr[9].value);
  53.155 +}
  53.156 +
  53.157 +/* machine_check_poll might be called by following types:
  53.158 + * 1. called when do mcheck_init.
  53.159 + * 2. called in cmci interrupt handler
  53.160 + * 3. called in polling handler
  53.161 + * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 
  53.162 + * consumer.
  53.163 +*/
  53.164 +static int machine_check_poll(struct mc_info *mi, int calltype)
  53.165 +{
  53.166 +    int exceptions = (read_cr4() & X86_CR4_MCE);
  53.167 +    int i, nr_unit = 0, uc = 0, pcc = 0;
  53.168 +    uint64_t status, addr;
  53.169 +    struct mcinfo_global mcg;
  53.170 +    struct mcinfo_extended mce;
  53.171 +    unsigned int cpu;
  53.172 +    struct domain *d;
  53.173 +
  53.174 +    cpu = smp_processor_id();
  53.175 +
  53.176 +    if (!mi) {
  53.177 +        printk(KERN_ERR "mcheck_poll: Failed to get mc_info entry\n");
  53.178 +        return 0;
  53.179 +    }
  53.180 +    x86_mcinfo_clear(mi);
  53.181 +
  53.182 +    memset(&mcg, 0, sizeof(mcg));
  53.183 +    mcg.common.type = MC_TYPE_GLOBAL;
  53.184 +    mcg.common.size = sizeof(mcg);
  53.185 +    /*If called from cpu-reset check, don't need to fill them.
  53.186 +     *If called from cmci context, we'll try to fill domid by memory addr
  53.187 +    */
  53.188 +    mcg.mc_domid = -1;
  53.189 +    mcg.mc_vcpuid = -1;
  53.190 +    if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET)
  53.191 +        mcg.mc_flags = MC_FLAG_POLLED;
  53.192 +    else if (calltype == MC_FLAG_CMCI)
  53.193 +        mcg.mc_flags = MC_FLAG_CMCI;
  53.194 +    mcg.mc_socketid = phys_proc_id[cpu];
  53.195 +    mcg.mc_coreid = cpu_core_id[cpu];
  53.196 +    mcg.mc_apicid = cpu_physical_id(cpu);
  53.197 +    mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 
  53.198 +    rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
  53.199 +
  53.200 +    for ( i = 0; i < nr_mce_banks; i++ ) {
  53.201 +        struct mcinfo_bank mcb;
  53.202 +        /*For CMCI, only owners checks the owned MSRs*/
  53.203 +        if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
  53.204 +             (calltype & MC_FLAG_CMCI) )
  53.205 +            continue;
  53.206 +        rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
  53.207 +
  53.208 +        if (! (status & MCi_STATUS_VAL) )
  53.209 +            continue;
  53.210 +        /*
  53.211 +         * Uncorrected events are handled by the exception
  53.212 +         * handler when it is enabled. But when the exception
  53.213 +         * is disabled such as when mcheck_init, log everything.
  53.214 +         */
  53.215 +        if ((status & MCi_STATUS_UC) && exceptions)
  53.216 +            continue;
  53.217 +
  53.218 +        if (status & MCi_STATUS_UC)
  53.219 +            uc = 1;
  53.220 +        if (status & MCi_STATUS_PCC)
  53.221 +            pcc = 1;
  53.222 +
  53.223 +        memset(&mcb, 0, sizeof(mcb));
  53.224 +        mcb.common.type = MC_TYPE_BANK;
  53.225 +        mcb.common.size = sizeof(mcb);
  53.226 +        mcb.mc_bank = i;
  53.227 +        mcb.mc_status = status;
  53.228 +        if (status & MCi_STATUS_MISCV)
  53.229 +            rdmsrl(MSR_IA32_MC0_MISC + 4 * i, mcb.mc_misc);
  53.230 +        if (status & MCi_STATUS_ADDRV) {
  53.231 +            rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
  53.232 +            d = maddr_get_owner(addr);
  53.233 +            if ( d && (calltype == MC_FLAG_CMCI || calltype == MC_FLAG_POLLED) )
  53.234 +                mcb.mc_domid = d->domain_id;
  53.235 +        }
  53.236 +        if (cmci_support)
  53.237 +            rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
  53.238 +        if (calltype == MC_FLAG_CMCI)
  53.239 +            rdtscll(mcb.mc_tsc);
  53.240 +        x86_mcinfo_add(mi, &mcb);
  53.241 +        nr_unit++;
  53.242 +        add_taint(TAINT_MACHINE_CHECK);
  53.243 +        /*Clear state for this bank */
  53.244 +        wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0);
  53.245 +        printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%"PRIx64"]\n", 
  53.246 +                i, cpu, status);
  53.247 +        printk(KERN_DEBUG "mcheck_poll: CPU%d, SOCKET%d, CORE%d, APICID[%d], "
  53.248 +                "thread[%d]\n", cpu, mcg.mc_socketid, 
  53.249 +                mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid);
  53.250 + 
  53.251 +    }
  53.252 +    /*if pcc = 1, uc must be 1*/
  53.253 +    if (pcc)
  53.254 +        mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
  53.255 +    else if (uc)
  53.256 +        mcg.mc_flags |= MC_FLAG_RECOVERABLE;
  53.257 +    else /*correctable*/
  53.258 +        mcg.mc_flags |= MC_FLAG_CORRECTABLE;
  53.259 +
  53.260 +    if (nr_unit && nr_intel_ext_msrs && 
  53.261 +                    (mcg.mc_gstatus & MCG_STATUS_EIPV)) {
  53.262 +        intel_get_extended_msrs(&mce);
  53.263 +        x86_mcinfo_add(mi, &mce);
  53.264 +    }
  53.265 +    if (nr_unit) 
  53.266 +        x86_mcinfo_add(mi, &mcg);
  53.267 +    /*Clear global state*/
  53.268 +    return nr_unit;
  53.269 +}
  53.270 +
  53.271 +static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code)
  53.272 +{
  53.273 +    /* MACHINE CHECK Error handler will be sent in another patch,
  53.274 +     * simply copy old solutions here. This code will be replaced
  53.275 +     * by upcoming machine check patches
  53.276 +     */
  53.277 +
  53.278 +    int recover=1;
  53.279 +    u32 alow, ahigh, high, low;
  53.280 +    u32 mcgstl, mcgsth;
  53.281 +    int i;
  53.282 +   
  53.283 +    rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
  53.284 +    if (mcgstl & (1<<0))       /* Recoverable ? */
  53.285 +        recover=0;
  53.286 +    
  53.287 +    printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
  53.288 +           smp_processor_id(), mcgsth, mcgstl);
  53.289 +    
  53.290 +    for (i=0; i<nr_mce_banks; i++) {
  53.291 +        rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
  53.292 +        if (high & (1<<31)) {
  53.293 +            if (high & (1<<29))
  53.294 +                recover |= 1;
  53.295 +            if (high & (1<<25))
  53.296 +                recover |= 2;
  53.297 +            printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
  53.298 +            high &= ~(1<<31);
  53.299 +            if (high & (1<<27)) {
  53.300 +                rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
  53.301 +                printk ("[%08x%08x]", ahigh, alow);
  53.302 +            }
  53.303 +            if (high & (1<<26)) {
  53.304 +                rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
  53.305 +                printk (" at %08x%08x", ahigh, alow);
  53.306 +            }
  53.307 +            printk ("\n");
  53.308 +        }
  53.309 +    }
  53.310 +    
  53.311 +    if (recover & 2)
  53.312 +        mc_panic ("CPU context corrupt");
  53.313 +    if (recover & 1)
  53.314 +        mc_panic ("Unable to continue");
  53.315 +    
  53.316 +    printk(KERN_EMERG "Attempting to continue.\n");
  53.317 +    /* 
  53.318 +     * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
  53.319 +     * recoverable/continuable.This will allow BIOS to look at the MSRs
  53.320 +     * for errors if the OS could not log the error.
  53.321 +     */
  53.322 +    for (i=0; i<nr_mce_banks; i++) {
  53.323 +        u32 msr;
  53.324 +        msr = MSR_IA32_MC0_STATUS+i*4;
  53.325 +        rdmsr (msr, low, high);
  53.326 +        if (high&(1<<31)) {
  53.327 +            /* Clear it */
  53.328 +            wrmsr(msr, 0UL, 0UL);
  53.329 +            /* Serialize */
  53.330 +            wmb();
  53.331 +            add_taint(TAINT_MACHINE_CHECK);
  53.332 +        }
  53.333 +    }
  53.334 +    mcgstl &= ~(1<<2);
  53.335 +    wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
  53.336 +}
  53.337 +
  53.338 +static DEFINE_SPINLOCK(cmci_discover_lock);
  53.339 +static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
  53.340 +
  53.341 +/*
  53.342 + * Discover bank sharing using the algorithm recommended in the SDM.
  53.343 + */
  53.344 +static int do_cmci_discover(int i)
  53.345 +{
  53.346 +    unsigned msr = MSR_IA32_MC0_CTL2 + i;
  53.347 +    u64 val;
  53.348 +
  53.349 +    rdmsrl(msr, val);
  53.350 +    /* Some other CPU already owns this bank. */
  53.351 +    if (val & CMCI_EN) {
  53.352 +        clear_bit(i, __get_cpu_var(mce_banks_owned));
  53.353 +        goto out;
  53.354 +    }
  53.355 +    wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
  53.356 +    rdmsrl(msr, val);
  53.357 +
  53.358 +    if (!(val & CMCI_EN)) {
  53.359 +        /* This bank does not support CMCI. Polling timer has to handle it. */
  53.360 +        set_bit(i, __get_cpu_var(no_cmci_banks));
  53.361 +        return 0;
  53.362 +    }
  53.363 +    set_bit(i, __get_cpu_var(mce_banks_owned));
  53.364 +out:
  53.365 +    clear_bit(i, __get_cpu_var(no_cmci_banks));
  53.366 +    return 1;
  53.367 +}
  53.368 +
  53.369 +static void cmci_discover(void)
  53.370 +{
  53.371 +    unsigned long flags;
  53.372 +    int i;
  53.373 +
  53.374 +    printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id());
  53.375 +
  53.376 +    spin_lock_irqsave(&cmci_discover_lock, flags);
  53.377 +
  53.378 +    for (i = 0; i < nr_mce_banks; i++)
  53.379 +        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
  53.380 +            do_cmci_discover(i);
  53.381 +
  53.382 +    spin_unlock_irqrestore(&cmci_discover_lock, flags);
  53.383 +
  53.384 +    printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", 
  53.385 +           smp_processor_id(), 
  53.386 +           *((unsigned long *)__get_cpu_var(mce_banks_owned)), 
  53.387 +           *((unsigned long *)__get_cpu_var(no_cmci_banks)));
  53.388 +}
  53.389 +
  53.390 +/*
  53.391 + * Define an owner for each bank. Banks can be shared between CPUs
  53.392 + * and to avoid reporting events multiple times always set up one
  53.393 + * CPU as owner. 
  53.394 + *
  53.395 + * The assignment has to be redone when CPUs go offline and
  53.396 + * any of the owners goes away. Also pollers run in parallel so we
  53.397 + * have to be careful to update the banks in a way that doesn't
  53.398 + * lose or duplicate events.
  53.399 + */
  53.400 +
  53.401 +static void mce_set_owner(void)
  53.402 +{
  53.403 +    if (!cmci_support || mce_disabled == 1)
  53.404 +        return;
  53.405 +
  53.406 +    cmci_discover();
  53.407 +}
  53.408 +
  53.409 +static void __cpu_mcheck_distribute_cmci(void *unused)
  53.410 +{
  53.411 +    cmci_discover();
  53.412 +}
  53.413 +
  53.414 +void cpu_mcheck_distribute_cmci(void)
  53.415 +{
  53.416 +    if (cmci_support && !mce_disabled)
  53.417 +        on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0, 0);
  53.418 +}
  53.419 +
  53.420 +static void clear_cmci(void)
  53.421 +{
  53.422 +    int i;
  53.423 +
  53.424 +    if (!cmci_support || mce_disabled == 1)
  53.425 +        return;
  53.426 +
  53.427 +    printk(KERN_DEBUG "CMCI: clear_cmci support on CPU%d\n", 
  53.428 +            smp_processor_id());
  53.429 +
  53.430 +    for (i = 0; i < nr_mce_banks; i++) {
  53.431 +        unsigned msr = MSR_IA32_MC0_CTL2 + i;
  53.432 +        u64 val;
  53.433 +        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
  53.434 +            continue;
  53.435 +        rdmsrl(msr, val);
  53.436 +        if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
  53.437 +            wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
  53.438 +        clear_bit(i, __get_cpu_var(mce_banks_owned));
  53.439 +    }
  53.440 +}
  53.441 +
  53.442 +void cpu_mcheck_disable(void)
  53.443 +{
  53.444 +    clear_in_cr4(X86_CR4_MCE);
  53.445 +
  53.446 +    if (cmci_support && !mce_disabled)
  53.447 +        clear_cmci();
  53.448 +}
  53.449 +
  53.450 +static void intel_init_cmci(struct cpuinfo_x86 *c)
  53.451 +{
  53.452 +    u32 l, apic;
  53.453 +    int cpu = smp_processor_id();
  53.454 +
  53.455 +    if (!mce_available(c) || !cmci_support) {
  53.456 +        printk(KERN_DEBUG "CMCI: CPU%d has no CMCI support\n", cpu);
  53.457 +        return;
  53.458 +    }
  53.459 +
  53.460 +    apic = apic_read(APIC_CMCI);
  53.461 +    if ( apic & APIC_VECTOR_MASK )
  53.462 +    {
  53.463 +        printk(KERN_WARNING "CPU%d CMCI LVT vector (%#x) already installed\n",
  53.464 +            cpu, ( apic & APIC_VECTOR_MASK ));
  53.465 +        return;
  53.466 +    }
  53.467 +
  53.468 +    apic = CMCI_APIC_VECTOR;
  53.469 +    apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
  53.470 +    apic_write_around(APIC_CMCI, apic);
  53.471 +
  53.472 +    l = apic_read(APIC_CMCI);
  53.473 +    apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED);
  53.474 +}
  53.475 +
  53.476 +fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs)
  53.477 +{
  53.478 +    int nr_unit;
  53.479 +    struct mc_info *mi =  x86_mcinfo_getptr();
  53.480 +    int cpu = smp_processor_id();
  53.481 +
  53.482 +    ack_APIC_irq();
  53.483 +    irq_enter();
  53.484 +    printk(KERN_DEBUG "CMCI: cmci_intr happen on CPU%d\n", cpu);
  53.485 +    nr_unit = machine_check_poll(mi, MC_FLAG_CMCI);
  53.486 +    if (nr_unit) {
  53.487 +        x86_mcinfo_dump(mi);
  53.488 +        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
  53.489 +            send_guest_global_virq(dom0, VIRQ_MCA);
  53.490 +    }
  53.491 +    irq_exit();
  53.492 +}
  53.493 +
  53.494 +void mce_intel_feature_init(struct cpuinfo_x86 *c)
  53.495 +{
  53.496 +
  53.497 +#ifdef CONFIG_X86_MCE_THERMAL
  53.498 +    intel_init_thermal(c);
  53.499 +#endif
  53.500 +    intel_init_cmci(c);
  53.501 +}
  53.502 +
  53.503 +static void mce_cap_init(struct cpuinfo_x86 *c)
  53.504 +{
  53.505 +    u32 l, h;
  53.506 +
  53.507 +    rdmsr (MSR_IA32_MCG_CAP, l, h);
  53.508 +    if ((l & MCG_CMCI_P) && cpu_has_apic)
  53.509 +        cmci_support = 1;
  53.510 +
  53.511 +    nr_mce_banks = l & 0xff;
  53.512 +    if (nr_mce_banks > MAX_NR_BANKS)
  53.513 +        printk(KERN_WARNING "MCE: exceed max mce banks\n");
  53.514 +    if (l & MCG_EXT_P)
  53.515 +    {
  53.516 +        nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
  53.517 +        printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n",
  53.518 +            smp_processor_id(), nr_intel_ext_msrs);
  53.519 +    }
  53.520 +    /* for most of p6 family, bank 0 is an alias bios MSR.
  53.521 +     * But after model>1a, bank 0 is available*/
  53.522 +    if ( c->x86 == 6 && c->x86_vendor == X86_VENDOR_INTEL
  53.523 +            && c->x86_model < 0x1A)
  53.524 +        firstbank = 1;
  53.525 +    else
  53.526 +        firstbank = 0;
  53.527 +}
  53.528 +
  53.529 +static void mce_init(void)
  53.530 +{
  53.531 +    u32 l, h;
  53.532 +    int i, nr_unit;
  53.533 +    struct mc_info *mi =  x86_mcinfo_getptr();
  53.534 +    clear_in_cr4(X86_CR4_MCE);
  53.535 +    /* log the machine checks left over from the previous reset.
  53.536 +     * This also clears all registers*/
  53.537 +
  53.538 +    nr_unit = machine_check_poll(mi, MC_FLAG_RESET);
  53.539 +    /*in the boot up stage, not expect inject to DOM0, but go print out
  53.540 +    */
  53.541 +    if (nr_unit > 0)
  53.542 +        x86_mcinfo_dump(mi);
  53.543 +
  53.544 +    set_in_cr4(X86_CR4_MCE);
  53.545 +    rdmsr (MSR_IA32_MCG_CAP, l, h);
  53.546 +    if (l & MCG_CTL_P) /* Control register present ? */
  53.547 +        wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
  53.548 +
  53.549 +    for (i = firstbank; i < nr_mce_banks; i++)
  53.550 +    {
  53.551 +        /*Some banks are shared across cores, use MCi_CTRL to judge whether
  53.552 +         * this bank has been initialized by other cores already.*/
  53.553 +        rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
  53.554 +        if (!l & !h)
  53.555 +        {
  53.556 +            /*if ctl is 0, this bank is never initialized*/
  53.557 +            printk(KERN_DEBUG "mce_init: init bank%d\n", i);
  53.558 +            wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
  53.559 +            wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
  53.560 +       }
  53.561 +    }
  53.562 +    if (firstbank) /*if cmci enabled, firstbank = 0*/
  53.563 +        wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
  53.564 +}
  53.565 +
  53.566 +/*p4/p6 faimily has similar MCA initialization process*/
  53.567 +void intel_mcheck_init(struct cpuinfo_x86 *c)
  53.568 +{
  53.569 +    mce_cap_init(c);
  53.570 +    printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
  53.571 +            smp_processor_id());
  53.572 +    /* machine check is available */
  53.573 +    machine_check_vector = intel_machine_check;
  53.574 +    mce_init();
  53.575 +    mce_intel_feature_init(c);
  53.576 +    mce_set_owner();
  53.577 +}
  53.578 +
  53.579 +/*
  53.580 + * Periodic polling timer for "silent" machine check errors. If the
  53.581 + * poller finds an MCE, poll faster. When the poller finds no more 
  53.582 + * errors, poll slower
  53.583 +*/
  53.584 +static struct timer mce_timer;
  53.585 +
  53.586 +#define MCE_PERIOD 4000
  53.587 +#define MCE_MIN    2000
  53.588 +#define MCE_MAX    32000
  53.589 +
  53.590 +static u64 period = MCE_PERIOD;
  53.591 +static int adjust = 0;
  53.592 +
  53.593 +static void mce_intel_checkregs(void *info)
  53.594 +{
  53.595 +    int nr_unit;
  53.596 +    struct mc_info *mi =  x86_mcinfo_getptr();
  53.597 +
  53.598 +    if( !mce_available(&current_cpu_data))
  53.599 +        return;
  53.600 +    nr_unit = machine_check_poll(mi, MC_FLAG_POLLED);
  53.601 +    if (nr_unit)
  53.602 +    {
  53.603 +        x86_mcinfo_dump(mi);
  53.604 +        adjust++;
  53.605 +        if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA))
  53.606 +            send_guest_global_virq(dom0, VIRQ_MCA);
  53.607 +    }
  53.608 +}
  53.609 +
  53.610 +static void mce_intel_work_fn(void *data)
  53.611 +{
  53.612 +    on_each_cpu(mce_intel_checkregs, data, 1, 1);
  53.613 +    if (adjust) {
  53.614 +        period = period / (adjust + 1);
  53.615 +        printk(KERN_DEBUG "mcheck_poll: Find error, shorten interval "
  53.616 +               "to %"PRIu64"\n", period);
  53.617 +    }
  53.618 +    else {
  53.619 +        period *= 2;
  53.620 +    }
  53.621 +    if (period > MCE_MAX) 
  53.622 +        period = MCE_MAX;
  53.623 +    if (period < MCE_MIN)
  53.624 +        period = MCE_MIN;
  53.625 +    set_timer(&mce_timer, NOW() + MILLISECS(period));
  53.626 +    adjust = 0;
  53.627 +}
  53.628 +
  53.629 +void intel_mcheck_timer(struct cpuinfo_x86 *c)
  53.630 +{
  53.631 +    printk(KERN_DEBUG "mcheck_poll: Init_mcheck_timer\n");
  53.632 +    init_timer(&mce_timer, mce_intel_work_fn, NULL, 0);
  53.633 +    set_timer(&mce_timer, NOW() + MILLISECS(MCE_PERIOD));
  53.634 +}
  53.635 +
    54.1 --- a/xen/arch/x86/cpu/mcheck/non-fatal.c	Wed Dec 24 12:50:57 2008 +0900
    54.2 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c	Wed Dec 24 12:52:34 2008 +0900
    54.3 @@ -19,8 +19,8 @@
    54.4  #include <asm/msr.h>
    54.5  
    54.6  #include "mce.h"
    54.7 -
    54.8 -static int firstbank;
    54.9 +#include "x86_mca.h"
   54.10 +int firstbank = 0;
   54.11  static struct timer mce_timer;
   54.12  
   54.13  #define MCE_PERIOD MILLISECS(15000)
   54.14 @@ -61,13 +61,8 @@ static int __init init_nonfatal_mce_chec
   54.15  	struct cpuinfo_x86 *c = &boot_cpu_data;
   54.16  
   54.17  	/* Check for MCE support */
   54.18 -	if (!cpu_has(c, X86_FEATURE_MCE))
   54.19 +	if (!mce_available(c))
   54.20  		return -ENODEV;
   54.21 -
   54.22 -	/* Check for PPro style MCA */
   54.23 -	if (!cpu_has(c, X86_FEATURE_MCA))
   54.24 -		return -ENODEV;
   54.25 -
   54.26  	/*
   54.27  	 * Check for non-fatal errors every MCE_RATE s
   54.28  	 */
   54.29 @@ -85,12 +80,20 @@ static int __init init_nonfatal_mce_chec
   54.30  		break;
   54.31  
   54.32  	case X86_VENDOR_INTEL:
   54.33 -		init_timer(&mce_timer, mce_work_fn, NULL, 0);
   54.34 -		set_timer(&mce_timer, NOW() + MCE_PERIOD);
   54.35 +		/* p5 family is different. P4/P6 and latest CPUs shares the
   54.36 +		 * same polling methods
   54.37 +		*/
   54.38 +		if ( c->x86 != 5 )
   54.39 +		{
   54.40 +			/* some CPUs or banks don't support cmci, we need to 
   54.41 +			 * enable this feature anyway
   54.42 +			 */
   54.43 +			intel_mcheck_timer(c);
   54.44 +		}
   54.45  		break;
   54.46  	}
   54.47  
   54.48 -	printk(KERN_INFO "MCA: Machine check polling timer started.\n");
   54.49 +	printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n");
   54.50  	return 0;
   54.51  }
   54.52  __initcall(init_nonfatal_mce_checker);
    55.1 --- a/xen/arch/x86/cpu/mcheck/p4.c	Wed Dec 24 12:50:57 2008 +0900
    55.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    55.3 @@ -1,270 +0,0 @@
    55.4 -/*
    55.5 - * P4 specific Machine Check Exception Reporting
    55.6 - */
    55.7 -
    55.8 -#include <xen/init.h>
    55.9 -#include <xen/types.h>
   55.10 -#include <xen/kernel.h>
   55.11 -#include <xen/config.h>
   55.12 -#include <xen/smp.h>
   55.13 -#include <xen/irq.h>
   55.14 -#include <xen/time.h>
   55.15 -#include <asm/processor.h> 
   55.16 -#include <asm/system.h>
   55.17 -#include <asm/msr.h>
   55.18 -#include <asm/apic.h>
   55.19 -
   55.20 -#include "mce.h"
   55.21 -
   55.22 -/* as supported by the P4/Xeon family */
   55.23 -struct intel_mce_extended_msrs {
   55.24 -	u32 eax;
   55.25 -	u32 ebx;
   55.26 -	u32 ecx;
   55.27 -	u32 edx;
   55.28 -	u32 esi;
   55.29 -	u32 edi;
   55.30 -	u32 ebp;
   55.31 -	u32 esp;
   55.32 -	u32 eflags;
   55.33 -	u32 eip;
   55.34 -	/* u32 *reserved[]; */
   55.35 -};
   55.36 -
   55.37 -static int mce_num_extended_msrs = 0;
   55.38 -
   55.39 -
   55.40 -#ifdef CONFIG_X86_MCE_P4THERMAL
   55.41 -static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
   55.42 -{	
   55.43 -	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
   55.44 -			smp_processor_id());
   55.45 -	add_taint(TAINT_MACHINE_CHECK);
   55.46 -}
   55.47 -
   55.48 -/* P4/Xeon Thermal transition interrupt handler */
   55.49 -static void intel_thermal_interrupt(struct cpu_user_regs *regs)
   55.50 -{
   55.51 -	u32 l, h;
   55.52 -	unsigned int cpu = smp_processor_id();
   55.53 -	static s_time_t next[NR_CPUS];
   55.54 -
   55.55 -	ack_APIC_irq();
   55.56 -
   55.57 -	if (NOW() < next[cpu])
   55.58 -		return;
   55.59 -
   55.60 -	next[cpu] = NOW() + MILLISECS(5000);
   55.61 -	rdmsr(MSR_IA32_THERM_STATUS, l, h);
   55.62 -	if (l & 0x1) {
   55.63 -		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
   55.64 -		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
   55.65 -				cpu);
   55.66 -		add_taint(TAINT_MACHINE_CHECK);
   55.67 -	} else {
   55.68 -		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
   55.69 -	}
   55.70 -}
   55.71 -
   55.72 -/* Thermal interrupt handler for this CPU setup */
   55.73 -static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) = unexpected_thermal_interrupt;
   55.74 -
   55.75 -fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
   55.76 -{
   55.77 -	irq_enter();
   55.78 -	vendor_thermal_interrupt(regs);
   55.79 -	irq_exit();
   55.80 -}
   55.81 -
   55.82 -/* P4/Xeon Thermal regulation detect and init */
   55.83 -static void intel_init_thermal(struct cpuinfo_x86 *c)
   55.84 -{
   55.85 -	u32 l, h;
   55.86 -	unsigned int cpu = smp_processor_id();
   55.87 -
   55.88 -	/* Thermal monitoring */
   55.89 -	if (!cpu_has(c, X86_FEATURE_ACPI))
   55.90 -		return;	/* -ENODEV */
   55.91 -
   55.92 -	/* Clock modulation */
   55.93 -	if (!cpu_has(c, X86_FEATURE_ACC))
   55.94 -		return;	/* -ENODEV */
   55.95 -
   55.96 -	/* first check if its enabled already, in which case there might
   55.97 -	 * be some SMM goo which handles it, so we can't even put a handler
   55.98 -	 * since it might be delivered via SMI already -zwanem.
   55.99 -	 */
  55.100 -	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
  55.101 -	h = apic_read(APIC_LVTTHMR);
  55.102 -	if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
  55.103 -		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
  55.104 -				cpu);
  55.105 -		return; /* -EBUSY */
  55.106 -	}
  55.107 -
  55.108 -	/* check whether a vector already exists, temporarily masked? */	
  55.109 -	if (h & APIC_VECTOR_MASK) {
  55.110 -		printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
  55.111 -				"installed\n",
  55.112 -			cpu, (h & APIC_VECTOR_MASK));
  55.113 -		return; /* -EBUSY */
  55.114 -	}
  55.115 -
  55.116 -	/* The temperature transition interrupt handler setup */
  55.117 -	h = THERMAL_APIC_VECTOR;		/* our delivery vector */
  55.118 -	h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
  55.119 -	apic_write_around(APIC_LVTTHMR, h);
  55.120 -
  55.121 -	rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
  55.122 -	wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
  55.123 -
  55.124 -	/* ok we're good to go... */
  55.125 -	vendor_thermal_interrupt = intel_thermal_interrupt;
  55.126 -	
  55.127 -	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
  55.128 -	wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
  55.129 -	
  55.130 -	l = apic_read (APIC_LVTTHMR);
  55.131 -	apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
  55.132 -	printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
  55.133 -	return;
  55.134 -}
  55.135 -#endif /* CONFIG_X86_MCE_P4THERMAL */
  55.136 -
  55.137 -
  55.138 -/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
  55.139 -static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
  55.140 -{
  55.141 -	u32 h;
  55.142 -
  55.143 -	if (mce_num_extended_msrs == 0)
  55.144 -		goto done;
  55.145 -
  55.146 -	rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
  55.147 -	rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
  55.148 -	rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
  55.149 -	rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
  55.150 -	rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
  55.151 -	rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
  55.152 -	rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
  55.153 -	rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
  55.154 -	rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
  55.155 -	rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
  55.156 -
  55.157 -	/* can we rely on kmalloc to do a dynamic
  55.158 -	 * allocation for the reserved registers?
  55.159 -	 */
  55.160 -done:
  55.161 -	return mce_num_extended_msrs;
  55.162 -}
  55.163 -
  55.164 -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code)
  55.165 -{
  55.166 -	int recover=1;
  55.167 -	u32 alow, ahigh, high, low;
  55.168 -	u32 mcgstl, mcgsth;
  55.169 -	int i;
  55.170 -	struct intel_mce_extended_msrs dbg;
  55.171 -
  55.172 -	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
  55.173 -	if (mcgstl & (1<<0))	/* Recoverable ? */
  55.174 -		recover=0;
  55.175 -
  55.176 -	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
  55.177 -		smp_processor_id(), mcgsth, mcgstl);
  55.178 -
  55.179 -	if (intel_get_extended_msrs(&dbg)) {
  55.180 -		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
  55.181 -			smp_processor_id(), dbg.eip, dbg.eflags);
  55.182 -		printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
  55.183 -			dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
  55.184 -		printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
  55.185 -			dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
  55.186 -	}
  55.187 -
  55.188 -	for (i=0; i<nr_mce_banks; i++) {
  55.189 -		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
  55.190 -		if (high & (1<<31)) {
  55.191 -			if (high & (1<<29))
  55.192 -				recover |= 1;
  55.193 -			if (high & (1<<25))
  55.194 -				recover |= 2;
  55.195 -			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
  55.196 -			high &= ~(1<<31);
  55.197 -			if (high & (1<<27)) {
  55.198 -				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
  55.199 -				printk ("[%08x%08x]", ahigh, alow);
  55.200 -			}
  55.201 -			if (high & (1<<26)) {
  55.202 -				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
  55.203 -				printk (" at %08x%08x", ahigh, alow);
  55.204 -			}
  55.205 -			printk ("\n");
  55.206 -		}
  55.207 -	}
  55.208 -
  55.209 -	if (recover & 2)
  55.210 -		panic ("CPU context corrupt");
  55.211 -	if (recover & 1)
  55.212 -		panic ("Unable to continue");
  55.213 -
  55.214 -	printk(KERN_EMERG "Attempting to continue.\n");
  55.215 -	/* 
  55.216 -	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
  55.217 -	 * recoverable/continuable.This will allow BIOS to look at the MSRs
  55.218 -	 * for errors if the OS could not log the error.
  55.219 -	 */
  55.220 -	for (i=0; i<nr_mce_banks; i++) {
  55.221 -		u32 msr;
  55.222 -		msr = MSR_IA32_MC0_STATUS+i*4;
  55.223 -		rdmsr (msr, low, high);
  55.224 -		if (high&(1<<31)) {
  55.225 -			/* Clear it */
  55.226 -			wrmsr(msr, 0UL, 0UL);
  55.227 -			/* Serialize */
  55.228 -			wmb();
  55.229 -			add_taint(TAINT_MACHINE_CHECK);
  55.230 -		}
  55.231 -	}
  55.232 -	mcgstl &= ~(1<<2);
  55.233 -	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
  55.234 -}
  55.235 -
  55.236 -
  55.237 -void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
  55.238 -{
  55.239 -	u32 l, h;
  55.240 -	int i;
  55.241 -	
  55.242 -	machine_check_vector = intel_machine_check;
  55.243 -	wmb();
  55.244 -
  55.245 -	printk (KERN_INFO "Intel machine check architecture supported.\n");
  55.246 -	rdmsr (MSR_IA32_MCG_CAP, l, h);
  55.247 -	if (l & (1<<8))	/* Control register present ? */
  55.248 -		wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
  55.249 -	nr_mce_banks = l & 0xff;
  55.250 -
  55.251 -	for (i=0; i<nr_mce_banks; i++) {
  55.252 -		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
  55.253 -		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
  55.254 -	}
  55.255 -
  55.256 -	set_in_cr4 (X86_CR4_MCE);
  55.257 -	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
  55.258 -		smp_processor_id());
  55.259 -
  55.260 -	/* Check for P4/Xeon extended MCE MSRs */
  55.261 -	rdmsr (MSR_IA32_MCG_CAP, l, h);
  55.262 -	if (l & (1<<9))	{/* MCG_EXT_P */
  55.263 -		mce_num_extended_msrs = (l >> 16) & 0xff;
  55.264 -		printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
  55.265 -				" available\n",
  55.266 -			smp_processor_id(), mce_num_extended_msrs);
  55.267 -
  55.268 -#ifdef CONFIG_X86_MCE_P4THERMAL
  55.269 -		/* Check for P4/Xeon Thermal monitor */
  55.270 -		intel_init_thermal(c);
  55.271 -#endif
  55.272 -	}
  55.273 -}
    56.1 --- a/xen/arch/x86/cpu/mcheck/p5.c	Wed Dec 24 12:50:57 2008 +0900
    56.2 +++ b/xen/arch/x86/cpu/mcheck/p5.c	Wed Dec 24 12:52:34 2008 +0900
    56.3 @@ -13,6 +13,7 @@
    56.4  #include <asm/msr.h>
    56.5  
    56.6  #include "mce.h"
    56.7 +#include "x86_mca.h"
    56.8  
    56.9  /* Machine check handler for Pentium class Intel */
   56.10  static fastcall void pentium_machine_check(struct cpu_user_regs * regs, long error_code)
    57.1 --- a/xen/arch/x86/cpu/mcheck/p6.c	Wed Dec 24 12:50:57 2008 +0900
    57.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    57.3 @@ -1,118 +0,0 @@
    57.4 -/*
    57.5 - * P6 specific Machine Check Exception Reporting
    57.6 - * (C) Copyright 2002 Alan Cox <alan@redhat.com>
    57.7 - */
    57.8 -
    57.9 -#include <xen/init.h>
   57.10 -#include <xen/types.h>
   57.11 -#include <xen/kernel.h>
   57.12 -#include <xen/smp.h>
   57.13 -
   57.14 -#include <asm/processor.h> 
   57.15 -#include <asm/system.h>
   57.16 -#include <asm/msr.h>
   57.17 -
   57.18 -#include "mce.h"
   57.19 -
   57.20 -/* Machine Check Handler For PII/PIII */
   57.21 -static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code)
   57.22 -{
   57.23 -	int recover=1;
   57.24 -	u32 alow, ahigh, high, low;
   57.25 -	u32 mcgstl, mcgsth;
   57.26 -	int i;
   57.27 -
   57.28 -	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
   57.29 -	if (mcgstl & (1<<0))	/* Recoverable ? */
   57.30 -		recover=0;
   57.31 -
   57.32 -	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
   57.33 -		smp_processor_id(), mcgsth, mcgstl);
   57.34 -
   57.35 -	for (i=0; i<nr_mce_banks; i++) {
   57.36 -		rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
   57.37 -		if (high & (1<<31)) {
   57.38 -			if (high & (1<<29))
   57.39 -				recover |= 1;
   57.40 -			if (high & (1<<25))
   57.41 -				recover |= 2;
   57.42 -			printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
   57.43 -			high &= ~(1<<31);
   57.44 -			if (high & (1<<27)) {
   57.45 -				rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
   57.46 -				printk ("[%08x%08x]", ahigh, alow);
   57.47 -			}
   57.48 -			if (high & (1<<26)) {
   57.49 -				rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
   57.50 -				printk (" at %08x%08x", ahigh, alow);
   57.51 -			}
   57.52 -			printk ("\n");
   57.53 -		}
   57.54 -	}
   57.55 -
   57.56 -	if (recover & 2)
   57.57 -		panic ("CPU context corrupt");
   57.58 -	if (recover & 1)
   57.59 -		panic ("Unable to continue");
   57.60 -
   57.61 -	printk (KERN_EMERG "Attempting to continue.\n");
   57.62 -	/* 
   57.63 -	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
   57.64 -	 * recoverable/continuable.This will allow BIOS to look at the MSRs
   57.65 -	 * for errors if the OS could not log the error.
   57.66 -	 */
   57.67 -	for (i=0; i<nr_mce_banks; i++) {
   57.68 -		unsigned int msr;
   57.69 -		msr = MSR_IA32_MC0_STATUS+i*4;
   57.70 -		rdmsr (msr,low, high);
   57.71 -		if (high & (1<<31)) {
   57.72 -			/* Clear it */
   57.73 -			wrmsr (msr, 0UL, 0UL);
   57.74 -			/* Serialize */
   57.75 -			wmb();
   57.76 -			add_taint(TAINT_MACHINE_CHECK);
   57.77 -		}
   57.78 -	}
   57.79 -	mcgstl &= ~(1<<2);
   57.80 -	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
   57.81 -}
   57.82 -
   57.83 -/* Set up machine check reporting for processors with Intel style MCE */
   57.84 -void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
   57.85 -{
   57.86 -	u32 l, h;
   57.87 -	int i;
   57.88 -	
   57.89 -	/* Check for MCE support */
   57.90 -	if (!cpu_has(c, X86_FEATURE_MCE))
   57.91 -		return;
   57.92 -
   57.93 -	/* Check for PPro style MCA */
   57.94 - 	if (!cpu_has(c, X86_FEATURE_MCA))
   57.95 -		return;
   57.96 -
   57.97 -	/* Ok machine check is available */
   57.98 -	machine_check_vector = intel_machine_check;
   57.99 -	wmb();
  57.100 -
  57.101 -	printk (KERN_INFO "Intel machine check architecture supported.\n");
  57.102 -	rdmsr (MSR_IA32_MCG_CAP, l, h);
  57.103 -	if (l & (1<<8))	/* Control register present ? */
  57.104 -		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
  57.105 -	nr_mce_banks = l & 0xff;
  57.106 -
  57.107 -	/*
  57.108 -	 * Following the example in IA-32 SDM Vol 3:
  57.109 -	 * - MC0_CTL should not be written
  57.110 -	 * - Status registers on all banks should be cleared on reset
  57.111 -	 */
  57.112 -	for (i=1; i<nr_mce_banks; i++)
  57.113 -		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
  57.114 -
  57.115 -	for (i=0; i<nr_mce_banks; i++)
  57.116 -		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
  57.117 -
  57.118 -	set_in_cr4 (X86_CR4_MCE);
  57.119 -	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
  57.120 -		smp_processor_id());
  57.121 -}
    58.1 --- a/xen/arch/x86/cpu/mcheck/x86_mca.h	Wed Dec 24 12:50:57 2008 +0900
    58.2 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h	Wed Dec 24 12:52:34 2008 +0900
    58.3 @@ -28,7 +28,10 @@
    58.4  /* Bitfield of the MSR_IA32_MCG_CAP register */
    58.5  #define MCG_CAP_COUNT           0x00000000000000ffULL
    58.6  #define MCG_CTL_P               0x0000000000000100ULL
    58.7 -/* Bits 9-63 are reserved */
    58.8 +#define MCG_EXT_P		(1UL<<9)
    58.9 +#define MCG_EXT_CNT		(16)
   58.10 +#define MCG_CMCI_P		(1UL<<10)
   58.11 +/* Other bits are reserved */
   58.12  
   58.13  /* Bitfield of the MSR_IA32_MCG_STATUS register */
   58.14  #define MCG_STATUS_RIPV         0x0000000000000001ULL
   58.15 @@ -70,3 +73,17 @@
   58.16  /* reserved bits */
   58.17  #define MCi_STATUS_OTHER_RESERVED2      0x0180000000000000ULL
   58.18  
   58.19 +/*Intel Specific bitfield*/
   58.20 +#define CMCI_THRESHOLD			0x2
   58.21 +
   58.22 +
   58.23 +#define MAX_NR_BANKS 128
   58.24 +
   58.25 +typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS);
   58.26 +DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned);
   58.27 +
   58.28 +/* Global variables */
   58.29 +extern int mce_disabled;
   58.30 +extern unsigned int nr_mce_banks;
   58.31 +extern int firstbank;
   58.32 +
    59.1 --- a/xen/arch/x86/domctl.c	Wed Dec 24 12:50:57 2008 +0900
    59.2 +++ b/xen/arch/x86/domctl.c	Wed Dec 24 12:52:34 2008 +0900
    59.3 @@ -665,14 +665,6 @@ long arch_do_domctl(
    59.4          }
    59.5  
    59.6          ret = -EINVAL;
    59.7 -        if ( device_assigned(bus, devfn) )
    59.8 -        {
    59.9 -            gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
   59.10 -                     "%x:%x:%x already assigned, or non-existent\n",
   59.11 -                     bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   59.12 -            put_domain(d);
   59.13 -            break;
   59.14 -        }
   59.15  
   59.16          ret = assign_device(d, bus, devfn);
   59.17          if ( ret )
   59.18 @@ -715,15 +707,10 @@ long arch_do_domctl(
   59.19              put_domain(d);
   59.20              break;
   59.21          }
   59.22 -
   59.23 -        if ( !device_assigned(bus, devfn) )
   59.24 -        {
   59.25 -            put_domain(d);
   59.26 -            break;
   59.27 -        }
   59.28 -
   59.29          ret = 0;
   59.30 -        deassign_device(d, bus, devfn);
   59.31 +        spin_lock(&pcidevs_lock);
   59.32 +        ret = deassign_device(d, bus, devfn);
   59.33 +        spin_unlock(&pcidevs_lock);
   59.34          gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
   59.35              bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   59.36  
   59.37 @@ -1037,6 +1024,32 @@ long arch_do_domctl(
   59.38      }
   59.39      break;
   59.40  
   59.41 +    case XEN_DOMCTL_debug_op:
   59.42 +    {
   59.43 +        struct domain *d;
   59.44 +        struct vcpu *v;
   59.45 +
   59.46 +        ret = -ESRCH;
   59.47 +        d = rcu_lock_domain_by_id(domctl->domain);
   59.48 +        if ( d == NULL )
   59.49 +            break;
   59.50 +
   59.51 +        ret = -EINVAL;
   59.52 +        if ( (domctl->u.debug_op.vcpu >= MAX_VIRT_CPUS) ||
   59.53 +             ((v = d->vcpu[domctl->u.debug_op.vcpu]) == NULL) )
   59.54 +            goto debug_op_out;
   59.55 +
   59.56 +        ret = -EINVAL;
   59.57 +        if ( !is_hvm_domain(d))
   59.58 +            goto debug_op_out;
   59.59 +
   59.60 +        ret = hvm_debug_op(v, domctl->u.debug_op.op);
   59.61 +
   59.62 +    debug_op_out:
   59.63 +        rcu_unlock_domain(d);
   59.64 +    }
   59.65 +    break;
   59.66 +
   59.67      default:
   59.68          ret = -ENOSYS;
   59.69          break;
    60.1 --- a/xen/arch/x86/hvm/hvm.c	Wed Dec 24 12:50:57 2008 +0900
    60.2 +++ b/xen/arch/x86/hvm/hvm.c	Wed Dec 24 12:52:34 2008 +0900
    60.3 @@ -2700,6 +2700,32 @@ long do_hvm_op(unsigned long op, XEN_GUE
    60.4      return rc;
    60.5  }
    60.6  
    60.7 +int hvm_debug_op(struct vcpu *v, int32_t op)
    60.8 +{
    60.9 +    int rc;
   60.10 +
   60.11 +    switch ( op )
   60.12 +    {
   60.13 +        case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
   60.14 +        case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
   60.15 +            rc = -ENOSYS;
   60.16 +            if ( !cpu_has_monitor_trap_flag )
   60.17 +                break;
   60.18 +            rc = 0;
   60.19 +            vcpu_pause(v);
   60.20 +            v->arch.hvm_vcpu.single_step =
   60.21 +                (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON);
   60.22 +            vcpu_unpause(v); /* guest will latch new state */
   60.23 +            break;
   60.24 +        default:
   60.25 +            rc = -ENOSYS;
   60.26 +            break;
   60.27 +    }
   60.28 +
   60.29 +    return rc;
   60.30 +}
   60.31 +
   60.32 +
   60.33  /*
   60.34   * Local variables:
   60.35   * mode: C
    61.1 --- a/xen/arch/x86/hvm/svm/intr.c	Wed Dec 24 12:50:57 2008 +0900
    61.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Wed Dec 24 12:52:34 2008 +0900
    61.3 @@ -80,7 +80,8 @@ static void enable_intr_window(struct vc
    61.4  
    61.5      ASSERT(intack.source != hvm_intsrc_none);
    61.6  
    61.7 -    HVMTRACE_2D(INJ_VIRQ, 0x0, /*fake=*/ 1);
    61.8 +    HVMTRACE_3D(INTR_WINDOW, intack.vector, intack.source,
    61.9 +                vmcb->eventinj.fields.v?vmcb->eventinj.fields.vector:-1);
   61.10  
   61.11      /*
   61.12       * Create a dummy virtual interrupt to intercept as soon as the
    62.1 --- a/xen/arch/x86/hvm/vmx/entry.S	Wed Dec 24 12:50:57 2008 +0900
    62.2 +++ b/xen/arch/x86/hvm/vmx/entry.S	Wed Dec 24 12:52:34 2008 +0900
    62.3 @@ -133,9 +133,15 @@ vmx_asm_do_vmentry:
    62.4          cmpl $0,(r(dx),r(ax),1)
    62.5          jnz  .Lvmx_process_softirqs
    62.6  
    62.7 -        testb $0xff,VCPU_vmx_emul(r(bx))
    62.8 -        jnz  .Lvmx_goto_realmode
    62.9 +        testb $0xff,VCPU_vmx_emulate(r(bx))
   62.10 +        jnz .Lvmx_goto_emulator
   62.11 +        testb $0xff,VCPU_vmx_realmode(r(bx))
   62.12 +        jz .Lvmx_not_realmode
   62.13 +        cmpw $0,VCPU_vm86_seg_mask(r(bx))
   62.14 +        jnz .Lvmx_goto_emulator
   62.15 +        call_with_regs(vmx_enter_realmode) 
   62.16  
   62.17 +.Lvmx_not_realmode:
   62.18          mov  VCPU_hvm_guest_cr2(r(bx)),r(ax)
   62.19          mov  r(ax),%cr2
   62.20          call vmx_trace_vmentry
   62.21 @@ -189,7 +195,7 @@ vmx_asm_do_vmentry:
   62.22          call vm_launch_fail
   62.23          ud2
   62.24  
   62.25 -.Lvmx_goto_realmode:
   62.26 +.Lvmx_goto_emulator:
   62.27          sti
   62.28          call_with_regs(vmx_realmode)
   62.29          jmp  vmx_asm_do_vmentry
    63.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Wed Dec 24 12:50:57 2008 +0900
    63.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Wed Dec 24 12:52:34 2008 +0900
    63.3 @@ -74,6 +74,13 @@ static void enable_intr_window(struct vc
    63.4  
    63.5      ASSERT(intack.source != hvm_intsrc_none);
    63.6  
    63.7 +    if ( unlikely(tb_init_done) )
    63.8 +    {
    63.9 +        unsigned int intr = __vmread(VM_ENTRY_INTR_INFO);
   63.10 +        HVMTRACE_3D(INTR_WINDOW, intack.vector, intack.source,
   63.11 +                    (intr & INTR_INFO_VALID_MASK) ? intr & 0xff : -1);
   63.12 +    }
   63.13 +
   63.14      if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
   63.15      {
   63.16          /*
   63.17 @@ -110,6 +117,14 @@ asmlinkage void vmx_intr_assist(void)
   63.18      unsigned int tpr_threshold = 0;
   63.19      enum hvm_intblk intblk;
   63.20  
   63.21 +    /* Block event injection when single step with MTF. */
   63.22 +    if ( unlikely(v->arch.hvm_vcpu.single_step) )
   63.23 +    {
   63.24 +        v->arch.hvm_vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG;
   63.25 +        __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
   63.26 +        return;
   63.27 +    }
   63.28 +
   63.29      /* Crank the handle on interrupt state. */
   63.30      pt_update_irq(v);
   63.31      hvm_dirq_assist(v);
    64.1 --- a/xen/arch/x86/hvm/vmx/realmode.c	Wed Dec 24 12:50:57 2008 +0900
    64.2 +++ b/xen/arch/x86/hvm/vmx/realmode.c	Wed Dec 24 12:52:34 2008 +0900
    64.3 @@ -103,31 +103,13 @@ static void realmode_deliver_exception(
    64.4  static void realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt)
    64.5  {
    64.6      struct vcpu *curr = current;
    64.7 -    unsigned long seg_reg_dirty;
    64.8      uint32_t intr_info;
    64.9      int rc;
   64.10  
   64.11 -    seg_reg_dirty = hvmemul_ctxt->seg_reg_dirty;
   64.12 -    hvmemul_ctxt->seg_reg_dirty = 0;
   64.13 +    perfc_incr(realmode_emulations);
   64.14  
   64.15      rc = hvm_emulate_one(hvmemul_ctxt);
   64.16  
   64.17 -    if ( test_bit(x86_seg_cs, &hvmemul_ctxt->seg_reg_dirty) )
   64.18 -    {
   64.19 -        curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_CS;
   64.20 -        if ( hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt)->sel & 3 )
   64.21 -            curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_CS;
   64.22 -    }
   64.23 -
   64.24 -    if ( test_bit(x86_seg_ss, &hvmemul_ctxt->seg_reg_dirty) )
   64.25 -    {
   64.26 -        curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_SS;
   64.27 -        if ( hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt)->sel & 3 )
   64.28 -            curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_SS;
   64.29 -    }
   64.30 -
   64.31 -    hvmemul_ctxt->seg_reg_dirty |= seg_reg_dirty;
   64.32 -
   64.33      if ( rc == X86EMUL_UNHANDLEABLE )
   64.34      {
   64.35          gdprintk(XENLOG_ERR, "Failed to emulate insn.\n");
   64.36 @@ -210,7 +192,8 @@ void vmx_realmode(struct cpu_user_regs *
   64.37          intr_info = 0;
   64.38      }
   64.39  
   64.40 -    while ( curr->arch.hvm_vmx.vmxemul &&
   64.41 +    curr->arch.hvm_vmx.vmx_emulate = 1;
   64.42 +    while ( curr->arch.hvm_vmx.vmx_emulate &&
   64.43              !softirq_pending(smp_processor_id()) &&
   64.44              (curr->arch.hvm_vcpu.io_state == HVMIO_none) )
   64.45      {
   64.46 @@ -220,13 +203,27 @@ void vmx_realmode(struct cpu_user_regs *
   64.47           * in real mode, because we don't emulate protected-mode IDT vectoring.
   64.48           */
   64.49          if ( unlikely(!(++emulations & 15)) &&
   64.50 -             !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) &&
   64.51 +             curr->arch.hvm_vmx.vmx_realmode && 
   64.52               hvm_local_events_need_delivery(curr) )
   64.53              break;
   64.54 +
   64.55          realmode_emulate_one(&hvmemul_ctxt);
   64.56 +
   64.57 +        /* Stop emulating unless our segment state is not safe */
   64.58 +        if ( curr->arch.hvm_vmx.vmx_realmode )
   64.59 +            curr->arch.hvm_vmx.vmx_emulate = 
   64.60 +                (curr->arch.hvm_vmx.vm86_segment_mask != 0);
   64.61 +        else
   64.62 +            curr->arch.hvm_vmx.vmx_emulate = 
   64.63 +                 ((hvmemul_ctxt.seg_reg[x86_seg_cs].sel & 3)
   64.64 +                  || (hvmemul_ctxt.seg_reg[x86_seg_ss].sel & 3));
   64.65      }
   64.66  
   64.67 -    if ( !curr->arch.hvm_vmx.vmxemul )
   64.68 +    /* Need to emulate next time if we've started an IO operation */
   64.69 +    if ( curr->arch.hvm_vcpu.io_state != HVMIO_none )
   64.70 +        curr->arch.hvm_vmx.vmx_emulate = 1;
   64.71 +
   64.72 +    if ( !curr->arch.hvm_vmx.vmx_emulate && !curr->arch.hvm_vmx.vmx_realmode )
   64.73      {
   64.74          /*
   64.75           * Cannot enter protected mode with bogus selector RPLs and DPLs.
    65.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Wed Dec 24 12:50:57 2008 +0900
    65.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Wed Dec 24 12:52:34 2008 +0900
    65.3 @@ -99,6 +99,7 @@ static void vmx_init_vmcs_config(void)
    65.4             (opt_softtsc ? CPU_BASED_RDTSC_EXITING : 0));
    65.5      opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
    65.6             CPU_BASED_TPR_SHADOW |
    65.7 +           CPU_BASED_MONITOR_TRAP_FLAG |
    65.8             CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
    65.9      _vmx_cpu_based_exec_control = adjust_vmx_controls(
   65.10          min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
   65.11 @@ -515,6 +516,9 @@ static int construct_vmcs(struct vcpu *v
   65.12          v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
   65.13      }
   65.14  
   65.15 +    /* Do not enable Monitor Trap Flag unless start single step debug */
   65.16 +    v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
   65.17 +
   65.18      __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
   65.19      if ( cpu_has_vmx_secondary_exec_control )
   65.20          __vmwrite(SECONDARY_VM_EXEC_CONTROL,
   65.21 @@ -867,7 +871,11 @@ void vmx_do_resume(struct vcpu *v)
   65.22      if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
   65.23      {
   65.24          unsigned long intercepts = __vmread(EXCEPTION_BITMAP);
   65.25 -        unsigned long mask = (1U << TRAP_debug) | (1U << TRAP_int3);
   65.26 +        unsigned long mask = 1u << TRAP_int3;
   65.27 +
   65.28 +        if ( !cpu_has_monitor_trap_flag )
   65.29 +            mask |= 1u << TRAP_debug;
   65.30 +
   65.31          v->arch.hvm_vcpu.debug_state_latch = debug_state;
   65.32          if ( debug_state )
   65.33              intercepts |= mask;
   65.34 @@ -880,15 +888,6 @@ void vmx_do_resume(struct vcpu *v)
   65.35      reset_stack_and_jump(vmx_asm_do_vmentry);
   65.36  }
   65.37  
   65.38 -static void vmx_dump_sel(char *name, enum x86_segment seg)
   65.39 -{
   65.40 -    struct segment_register sreg;
   65.41 -    hvm_get_segment_register(current, seg, &sreg);
   65.42 -    printk("%s: sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016llx\n", 
   65.43 -           name, sreg.sel, sreg.attr.bytes, sreg.limit,
   65.44 -           (unsigned long long)sreg.base);
   65.45 -}
   65.46 -
   65.47  static unsigned long vmr(unsigned long field)
   65.48  {
   65.49      int rc;
   65.50 @@ -897,6 +896,28 @@ static unsigned long vmr(unsigned long f
   65.51      return rc ? 0 : val;
   65.52  }
   65.53  
   65.54 +static void vmx_dump_sel(char *name, uint32_t selector)
   65.55 +{
   65.56 +    uint32_t sel, attr, limit;
   65.57 +    uint64_t base;
   65.58 +    sel = vmr(selector);
   65.59 +    attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR));
   65.60 +    limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR));
   65.61 +    base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR));
   65.62 +    printk("%s: sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016"PRIx64"\n",
   65.63 +           name, sel, attr, limit, base);
   65.64 +}
   65.65 +
   65.66 +static void vmx_dump_sel2(char *name, uint32_t lim)
   65.67 +{
   65.68 +    uint32_t limit;
   65.69 +    uint64_t base;
   65.70 +    limit = vmr(lim);
   65.71 +    base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
   65.72 +    printk("%s:                           limit=0x%08x, base=0x%016"PRIx64"\n",
   65.73 +           name, limit, base);
   65.74 +}
   65.75 +
   65.76  void vmcs_dump_vcpu(struct vcpu *v)
   65.77  {
   65.78      struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
   65.79 @@ -938,16 +959,16 @@ void vmcs_dump_vcpu(struct vcpu *v)
   65.80             (unsigned long long)vmr(GUEST_SYSENTER_ESP),
   65.81             (int)vmr(GUEST_SYSENTER_CS),
   65.82             (unsigned long long)vmr(GUEST_SYSENTER_EIP));
   65.83 -    vmx_dump_sel("CS", x86_seg_cs);
   65.84 -    vmx_dump_sel("DS", x86_seg_ds);
   65.85 -    vmx_dump_sel("SS", x86_seg_ss);
   65.86 -    vmx_dump_sel("ES", x86_seg_es);
   65.87 -    vmx_dump_sel("FS", x86_seg_fs);
   65.88 -    vmx_dump_sel("GS", x86_seg_gs);
   65.89 -    vmx_dump_sel("GDTR", x86_seg_gdtr);
   65.90 -    vmx_dump_sel("LDTR", x86_seg_ldtr);
   65.91 -    vmx_dump_sel("IDTR", x86_seg_idtr);
   65.92 -    vmx_dump_sel("TR", x86_seg_tr);
   65.93 +    vmx_dump_sel("CS", GUEST_CS_SELECTOR);
   65.94 +    vmx_dump_sel("DS", GUEST_DS_SELECTOR);
   65.95 +    vmx_dump_sel("SS", GUEST_SS_SELECTOR);
   65.96 +    vmx_dump_sel("ES", GUEST_ES_SELECTOR);
   65.97 +    vmx_dump_sel("FS", GUEST_FS_SELECTOR);
   65.98 +    vmx_dump_sel("GS", GUEST_GS_SELECTOR);
   65.99 +    vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT);
  65.100 +    vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
  65.101 +    vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
  65.102 +    vmx_dump_sel("TR", GUEST_TR_SELECTOR);
  65.103      x  = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32;
  65.104      x |= (uint32_t)vmr(TSC_OFFSET);
  65.105      printk("TSC Offset = %016llx\n", x);
    66.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Wed Dec 24 12:50:57 2008 +0900
    66.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Wed Dec 24 12:52:34 2008 +0900
    66.3 @@ -306,9 +306,6 @@ static void vmx_restore_host_msrs(void)
    66.4          wrmsrl(msr_index[i], host_msr_state->msrs[i]);
    66.5          clear_bit(i, &host_msr_state->flags);
    66.6      }
    66.7 -
    66.8 -    if ( cpu_has_nx && !(read_efer() & EFER_NX) )
    66.9 -        write_efer(read_efer() | EFER_NX);
   66.10  }
   66.11  
   66.12  static void vmx_save_guest_msrs(struct vcpu *v)
   66.13 @@ -342,39 +339,23 @@ static void vmx_restore_guest_msrs(struc
   66.14          clear_bit(i, &guest_flags);
   66.15      }
   66.16  
   66.17 -    if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & (EFER_NX | EFER_SCE) )
   66.18 +    if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & EFER_SCE )
   66.19      {
   66.20          HVM_DBG_LOG(DBG_LEVEL_2,
   66.21                      "restore guest's EFER with value %lx",
   66.22                      v->arch.hvm_vcpu.guest_efer);
   66.23 -        write_efer((read_efer() & ~(EFER_NX | EFER_SCE)) |
   66.24 -                   (v->arch.hvm_vcpu.guest_efer & (EFER_NX | EFER_SCE)));
   66.25 +        write_efer((read_efer() & ~EFER_SCE) |
   66.26 +                   (v->arch.hvm_vcpu.guest_efer & EFER_SCE));
   66.27      }
   66.28  }
   66.29  
   66.30  #else  /* __i386__ */
   66.31  
   66.32  #define vmx_save_host_msrs()        ((void)0)
   66.33 -
   66.34 -static void vmx_restore_host_msrs(void)
   66.35 -{
   66.36 -    if ( cpu_has_nx && !(read_efer() & EFER_NX) )
   66.37 -        write_efer(read_efer() | EFER_NX);
   66.38 -}
   66.39 +#define vmx_restore_host_msrs()     ((void)0)
   66.40  
   66.41  #define vmx_save_guest_msrs(v)      ((void)0)
   66.42 -
   66.43 -static void vmx_restore_guest_msrs(struct vcpu *v)
   66.44 -{
   66.45 -    if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & EFER_NX )
   66.46 -    {
   66.47 -        HVM_DBG_LOG(DBG_LEVEL_2,
   66.48 -                    "restore guest's EFER with value %lx",
   66.49 -                    v->arch.hvm_vcpu.guest_efer);
   66.50 -        write_efer((read_efer() & ~EFER_NX) |
   66.51 -                   (v->arch.hvm_vcpu.guest_efer & EFER_NX));
   66.52 -    }
   66.53 -}
   66.54 +#define vmx_restore_guest_msrs(v)   ((void)0)
   66.55  
   66.56  static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
   66.57  {
   66.58 @@ -704,6 +685,26 @@ static void vmx_ctxt_switch_to(struct vc
   66.59      vpmu_load(v);
   66.60  }
   66.61  
   66.62 +
   66.63 +/* SDM volume 3b section 22.3.1.2: we can only enter virtual 8086 mode
   66.64 + * if all of CS, SS, DS, ES, FS and GS are 16bit ring-3 data segments.
   66.65 + * The guest thinks it's got ring-0 segments, so we need to fudge
   66.66 + * things.  We store the ring-3 version in the VMCS to avoid lots of
   66.67 + * shuffling on vmenter and vmexit, and translate in these accessors. */
   66.68 +
   66.69 +#define rm_cs_attr (((union segment_attributes) {                       \
   66.70 +        .fields = { .type = 0xb, .s = 1, .dpl = 0, .p = 1, .avl = 0,    \
   66.71 +                    .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
   66.72 +#define rm_ds_attr (((union segment_attributes) {                       \
   66.73 +        .fields = { .type = 0x3, .s = 1, .dpl = 0, .p = 1, .avl = 0,    \
   66.74 +                    .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
   66.75 +#define vm86_ds_attr (((union segment_attributes) {                     \
   66.76 +        .fields = { .type = 0x3, .s = 1, .dpl = 3, .p = 1, .avl = 0,    \
   66.77 +                    .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
   66.78 +#define vm86_tr_attr (((union segment_attributes) {                     \
   66.79 +        .fields = { .type = 0xb, .s = 0, .dpl = 0, .p = 1, .avl = 0,    \
   66.80 +                    .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
   66.81 +
   66.82  static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
   66.83                                       struct segment_register *reg)
   66.84  {
   66.85 @@ -779,14 +780,85 @@ static void vmx_get_segment_register(str
   66.86      /* Unusable flag is folded into Present flag. */
   66.87      if ( attr & (1u<<16) )
   66.88          reg->attr.fields.p = 0;
   66.89 +
   66.90 +    /* Adjust for virtual 8086 mode */
   66.91 +    if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr 
   66.92 +         && !(v->arch.hvm_vmx.vm86_segment_mask & (1u << seg)) )
   66.93 +    {
   66.94 +        struct segment_register *sreg = &v->arch.hvm_vmx.vm86_saved_seg[seg];
   66.95 +        if ( seg == x86_seg_tr ) 
   66.96 +            *reg = *sreg;
   66.97 +        else if ( reg->base != sreg->base || seg == x86_seg_ss )
   66.98 +        {
   66.99 +            /* If the guest's reloaded the segment, remember the new version.
  66.100 +             * We can't tell if the guest reloaded the segment with another 
  66.101 +             * one that has the same base.  By default we assume it hasn't,
  66.102 +             * since we don't want to lose big-real-mode segment attributes,
  66.103 +             * but for SS we assume it has: the Ubuntu graphical bootloader
  66.104 +             * does this and gets badly confused if we leave the old SS in 
  66.105 +             * place. */
  66.106 +            reg->attr.bytes = (seg == x86_seg_cs ? rm_cs_attr : rm_ds_attr);
  66.107 +            *sreg = *reg;
  66.108 +        }
  66.109 +        else 
  66.110 +        {
  66.111 +            /* Always give realmode guests a selector that matches the base
  66.112 +             * but keep the attr and limit from before */
  66.113 +            *reg = *sreg;
  66.114 +            reg->sel = reg->base >> 4;
  66.115 +        }
  66.116 +    }
  66.117  }
  66.118  
  66.119  static void vmx_set_segment_register(struct vcpu *v, enum x86_segment seg,
  66.120                                       struct segment_register *reg)
  66.121  {
  66.122 -    uint32_t attr;
  66.123 +    uint32_t attr, sel, limit;
  66.124 +    uint64_t base;
  66.125 +
  66.126 +    sel = reg->sel;
  66.127 +    attr = reg->attr.bytes;
  66.128 +    limit = reg->limit;
  66.129 +    base = reg->base;
  66.130  
  66.131 -    attr = reg->attr.bytes;
  66.132 +    /* Adjust CS/SS/DS/ES/FS/GS/TR for virtual 8086 mode */
  66.133 +    if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr )
  66.134 +    {
  66.135 +        /* Remember the proper contents */
  66.136 +        v->arch.hvm_vmx.vm86_saved_seg[seg] = *reg;
  66.137 +        
  66.138 +        if ( seg == x86_seg_tr ) 
  66.139 +        {
  66.140 +            if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS] )
  66.141 +            {
  66.142 +                sel = 0;
  66.143 +                attr = vm86_tr_attr;
  66.144 +                limit = 0xff;
  66.145 +                base = v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS];
  66.146 +                v->arch.hvm_vmx.vm86_segment_mask &= ~(1u << seg);
  66.147 +            }
  66.148 +            else
  66.149 +                v->arch.hvm_vmx.vm86_segment_mask |= (1u << seg);
  66.150 +        }
  66.151 +        else
  66.152 +        {
  66.153 +            /* Try to fake it out as a 16bit data segment.  This could
  66.154 +             * cause confusion for the guest if it reads the selector,
  66.155 +             * but otherwise we have to emulate if *any* segment hasn't
  66.156 +             * been reloaded. */
  66.157 +            if ( base < 0x100000 && !(base & 0xf) && limit >= 0xffff
  66.158 +                 && reg->attr.fields.p )
  66.159 +            {
  66.160 +                sel = base >> 4;
  66.161 +                attr = vm86_ds_attr;
  66.162 +                limit = 0xffff;
  66.163 +                v->arch.hvm_vmx.vm86_segment_mask &= ~(1u << seg);
  66.164 +            }
  66.165 +            else 
  66.166 +                v->arch.hvm_vmx.vm86_segment_mask |= (1u << seg);
  66.167 +        }
  66.168 +    }
  66.169 +
  66.170      attr = ((attr & 0xf00) << 4) | (attr & 0xff);
  66.171  
  66.172      /* Not-present must mean unusable. */
  66.173 @@ -794,67 +866,67 @@ static void vmx_set_segment_register(str
  66.174          attr |= (1u << 16);
  66.175  
  66.176      /* VMX has strict consistency requirement for flag G. */
  66.177 -    attr |= !!(reg->limit >> 20) << 15;
  66.178 +    attr |= !!(limit >> 20) << 15;
  66.179  
  66.180      vmx_vmcs_enter(v);
  66.181  
  66.182      switch ( seg )
  66.183      {
  66.184      case x86_seg_cs:
  66.185 -        __vmwrite(GUEST_CS_SELECTOR, reg->sel);
  66.186 -        __vmwrite(GUEST_CS_LIMIT, reg->limit);
  66.187 -        __vmwrite(GUEST_CS_BASE, reg->base);
  66.188 +        __vmwrite(GUEST_CS_SELECTOR, sel);
  66.189 +        __vmwrite(GUEST_CS_LIMIT, limit);
  66.190 +        __vmwrite(GUEST_CS_BASE, base);
  66.191          __vmwrite(GUEST_CS_AR_BYTES, attr);
  66.192          break;
  66.193      case x86_seg_ds:
  66.194 -        __vmwrite(GUEST_DS_SELECTOR, reg->sel);
  66.195 -        __vmwrite(GUEST_DS_LIMIT, reg->limit);
  66.196 -        __vmwrite(GUEST_DS_BASE, reg->base);
  66.197 +        __vmwrite(GUEST_DS_SELECTOR, sel);
  66.198 +        __vmwrite(GUEST_DS_LIMIT, limit);
  66.199 +        __vmwrite(GUEST_DS_BASE, base);
  66.200          __vmwrite(GUEST_DS_AR_BYTES, attr);
  66.201          break;
  66.202      case x86_seg_es:
  66.203 -        __vmwrite(GUEST_ES_SELECTOR, reg->sel);
  66.204 -        __vmwrite(GUEST_ES_LIMIT, reg->limit);
  66.205 -        __vmwrite(GUEST_ES_BASE, reg->base);
  66.206 +        __vmwrite(GUEST_ES_SELECTOR, sel);
  66.207 +        __vmwrite(GUEST_ES_LIMIT, limit);
  66.208 +        __vmwrite(GUEST_ES_BASE, base);
  66.209          __vmwrite(GUEST_ES_AR_BYTES, attr);
  66.210          break;
  66.211      case x86_seg_fs:
  66.212 -        __vmwrite(GUEST_FS_SELECTOR, reg->sel);
  66.213 -        __vmwrite(GUEST_FS_LIMIT, reg->limit);
  66.214 -        __vmwrite(GUEST_FS_BASE, reg->base);
  66.215 +        __vmwrite(GUEST_FS_SELECTOR, sel);
  66.216 +        __vmwrite(GUEST_FS_LIMIT, limit);
  66.217 +        __vmwrite(GUEST_FS_BASE, base);
  66.218          __vmwrite(GUEST_FS_AR_BYTES, attr);
  66.219          break;
  66.220      case x86_seg_gs:
  66.221 -        __vmwrite(GUEST_GS_SELECTOR, reg->sel);
  66.222 -        __vmwrite(GUEST_GS_LIMIT, reg->limit);
  66.223 -        __vmwrite(GUEST_GS_BASE, reg->base);
  66.224 +        __vmwrite(GUEST_GS_SELECTOR, sel);
  66.225 +        __vmwrite(GUEST_GS_LIMIT, limit);
  66.226 +        __vmwrite(GUEST_GS_BASE, base);
  66.227          __vmwrite(GUEST_GS_AR_BYTES, attr);
  66.228          break;
  66.229      case x86_seg_ss:
  66.230 -        __vmwrite(GUEST_SS_SELECTOR, reg->sel);
  66.231 -        __vmwrite(GUEST_SS_LIMIT, reg->limit);
  66.232 -        __vmwrite(GUEST_SS_BASE, reg->base);
  66.233 +        __vmwrite(GUEST_SS_SELECTOR, sel);
  66.234 +        __vmwrite(GUEST_SS_LIMIT, limit);
  66.235 +        __vmwrite(GUEST_SS_BASE, base);
  66.236          __vmwrite(GUEST_SS_AR_BYTES, attr);
  66.237          break;
  66.238      case x86_seg_tr:
  66.239 -        __vmwrite(GUEST_TR_SELECTOR, reg->sel);
  66.240 -        __vmwrite(GUEST_TR_LIMIT, reg->limit);
  66.241 -        __vmwrite(GUEST_TR_BASE, reg->base);
  66.242 +        __vmwrite(GUEST_TR_SELECTOR, sel);
  66.243 +        __vmwrite(GUEST_TR_LIMIT, limit);
  66.244 +        __vmwrite(GUEST_TR_BASE, base);
  66.245          /* VMX checks that the the busy flag (bit 1) is set. */
  66.246          __vmwrite(GUEST_TR_AR_BYTES, attr | 2);
  66.247          break;
  66.248      case x86_seg_gdtr:
  66.249 -        __vmwrite(GUEST_GDTR_LIMIT, reg->limit);
  66.250 -        __vmwrite(GUEST_GDTR_BASE, reg->base);
  66.251 +        __vmwrite(GUEST_GDTR_LIMIT, limit);
  66.252 +        __vmwrite(GUEST_GDTR_BASE, base);
  66.253          break;
  66.254      case x86_seg_idtr:
  66.255 -        __vmwrite(GUEST_IDTR_LIMIT, reg->limit);
  66.256 -        __vmwrite(GUEST_IDTR_BASE, reg->base);
  66.257 +        __vmwrite(GUEST_IDTR_LIMIT, limit);
  66.258 +        __vmwrite(GUEST_IDTR_BASE, base);
  66.259          break;
  66.260      case x86_seg_ldtr:
  66.261 -        __vmwrite(GUEST_LDTR_SELECTOR, reg->sel);
  66.262 -        __vmwrite(GUEST_LDTR_LIMIT, reg->limit);
  66.263 -        __vmwrite(GUEST_LDTR_BASE, reg->base);
  66.264 +        __vmwrite(GUEST_LDTR_SELECTOR, sel);
  66.265 +        __vmwrite(GUEST_LDTR_LIMIT, limit);
  66.266 +        __vmwrite(GUEST_LDTR_BASE, base);
  66.267          __vmwrite(GUEST_LDTR_AR_BYTES, attr);
  66.268          break;
  66.269      default:
  66.270 @@ -970,6 +1042,7 @@ static void vmx_update_guest_cr(struct v
  66.271      switch ( cr )
  66.272      {
  66.273      case 0: {
  66.274 +        int realmode;
  66.275          unsigned long hw_cr0_mask =
  66.276              X86_CR0_NE | X86_CR0_PG | X86_CR0_PE;
  66.277  
  66.278 @@ -998,9 +1071,44 @@ static void vmx_update_guest_cr(struct v
  66.279                  vmx_fpu_enter(v);
  66.280          }
  66.281  
  66.282 -        v->arch.hvm_vmx.vmxemul &= ~VMXEMUL_REALMODE;
  66.283 -        if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
  66.284 -            v->arch.hvm_vmx.vmxemul |= VMXEMUL_REALMODE;
  66.285 +        realmode = !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE); 
  66.286 +        if ( realmode != v->arch.hvm_vmx.vmx_realmode )
  66.287 +        {
  66.288 +            enum x86_segment s; 
  66.289 +            struct segment_register reg[x86_seg_tr + 1];
  66.290 +
  66.291 +            /* Entering or leaving real mode: adjust the segment registers.
  66.292 +             * Need to read them all either way, as realmode reads can update
  66.293 +             * the saved values we'll use when returning to prot mode. */
  66.294 +            for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ )
  66.295 +                vmx_get_segment_register(v, s, &reg[s]);
  66.296 +            v->arch.hvm_vmx.vmx_realmode = realmode;
  66.297 +            
  66.298 +            if ( realmode )
  66.299 +            {
  66.300 +                for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ )
  66.301 +                    vmx_set_segment_register(v, s, &reg[s]);
  66.302 +                v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME;
  66.303 +                __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
  66.304 +                __vmwrite(EXCEPTION_BITMAP, 0xffffffff);
  66.305 +            }
  66.306 +            else 
  66.307 +            {
  66.308 +                for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ ) 
  66.309 +                    if ( !(v->arch.hvm_vmx.vm86_segment_mask & (1<<s)) )
  66.310 +                        vmx_set_segment_register(
  66.311 +                            v, s, &v->arch.hvm_vmx.vm86_saved_seg[s]);
  66.312 +                v->arch.hvm_vcpu.hw_cr[4] =
  66.313 +                    ((v->arch.hvm_vcpu.hw_cr[4] & ~X86_CR4_VME)
  66.314 +                     |(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VME));
  66.315 +                __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
  66.316 +                __vmwrite(EXCEPTION_BITMAP, 
  66.317 +                          HVM_TRAP_MASK
  66.318 +                          | (paging_mode_hap(v->domain) ?
  66.319 +                             0 : (1U << TRAP_page_fault))
  66.320 +                          | (1U << TRAP_no_device));
  66.321 +            }
  66.322 +        }
  66.323  
  66.324          v->arch.hvm_vcpu.hw_cr[0] =
  66.325              v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
  66.326 @@ -1028,6 +1136,8 @@ static void vmx_update_guest_cr(struct v
  66.327          if ( paging_mode_hap(v->domain) )
  66.328              v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
  66.329          v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4];
  66.330 +        if ( v->arch.hvm_vmx.vmx_realmode ) 
  66.331 +            v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME;
  66.332          if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) )
  66.333          {
  66.334              v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE;
  66.335 @@ -1061,8 +1171,8 @@ static void vmx_update_guest_efer(struct
  66.336  #endif
  66.337  
  66.338      if ( v == current )
  66.339 -        write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
  66.340 -                   (v->arch.hvm_vcpu.guest_efer & (EFER_NX|EFER_SCE)));
  66.341 +        write_efer((read_efer() & ~EFER_SCE) |
  66.342 +                   (v->arch.hvm_vcpu.guest_efer & EFER_SCE));
  66.343  }
  66.344  
  66.345  static void vmx_flush_guest_tlbs(void)
  66.346 @@ -1097,6 +1207,7 @@ void ept_sync_domain(struct domain *d)
  66.347  static void __vmx_inject_exception(int trap, int type, int error_code)
  66.348  {
  66.349      unsigned long intr_fields;
  66.350 +    struct vcpu *curr = current;
  66.351  
  66.352      /*
  66.353       * NB. Callers do not need to worry about clearing STI/MOV-SS blocking:
  66.354 @@ -1113,6 +1224,11 @@ static void __vmx_inject_exception(int t
  66.355      }
  66.356  
  66.357      __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
  66.358 +
  66.359 +    /* Can't inject exceptions in virtual 8086 mode because they would 
  66.360 +     * use the protected-mode IDT.  Emulate at the next vmenter instead. */
  66.361 +    if ( curr->arch.hvm_vmx.vmx_realmode ) 
  66.362 +        curr->arch.hvm_vmx.vmx_emulate = 1;
  66.363  }
  66.364  
  66.365  void vmx_inject_hw_exception(int trap, int error_code)
  66.366 @@ -1128,6 +1244,8 @@ void vmx_inject_hw_exception(int trap, i
  66.367              __restore_debug_registers(curr);
  66.368              write_debugreg(6, read_debugreg(6) | 0x4000);
  66.369          }
  66.370 +        if ( cpu_has_monitor_trap_flag )
  66.371 +            break;
  66.372      case TRAP_int3:
  66.373          if ( curr->domain->debugger_attached )
  66.374          {
  66.375 @@ -1912,7 +2030,8 @@ static void vmx_do_extint(struct cpu_use
  66.376      fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
  66.377      fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
  66.378      fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
  66.379 -#ifdef CONFIG_X86_MCE_P4THERMAL
  66.380 +    fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs);
  66.381 +#ifdef CONFIG_X86_MCE_THERMAL
  66.382      fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
  66.383  #endif
  66.384  
  66.385 @@ -1942,10 +2061,13 @@ static void vmx_do_extint(struct cpu_use
  66.386      case ERROR_APIC_VECTOR:
  66.387          smp_error_interrupt(regs);
  66.388          break;
  66.389 +    case CMCI_APIC_VECTOR:
  66.390 +        smp_cmci_interrupt(regs);
  66.391 +        break;
  66.392      case PMU_APIC_VECTOR:
  66.393          smp_pmu_apic_interrupt(regs);
  66.394          break;
  66.395 -#ifdef CONFIG_X86_MCE_P4THERMAL
  66.396 +#ifdef CONFIG_X86_MCE_THERMAL
  66.397      case THERMAL_APIC_VECTOR:
  66.398          smp_thermal_interrupt(regs);
  66.399          break;
  66.400 @@ -2072,6 +2194,17 @@ static void vmx_failed_vmentry(unsigned 
  66.401      domain_crash(curr->domain);
  66.402  }
  66.403  
  66.404 +asmlinkage void vmx_enter_realmode(struct cpu_user_regs *regs)
  66.405 +{
  66.406 +    struct vcpu *v = current;
  66.407 +
  66.408 +    /* Adjust RFLAGS to enter virtual 8086 mode with IOPL == 3.  Since
  66.409 +     * we have CR4.VME == 1 and our own TSS with an empty interrupt
  66.410 +     * redirection bitmap, all software INTs will be handled by vm86 */
  66.411 +    v->arch.hvm_vmx.vm86_saved_eflags = regs->eflags;
  66.412 +    regs->eflags |= (X86_EFLAGS_VM | X86_EFLAGS_IOPL);
  66.413 +}
  66.414 +
  66.415  asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
  66.416  {
  66.417      unsigned int exit_reason, idtv_info;
  66.418 @@ -2100,6 +2233,42 @@ asmlinkage void vmx_vmexit_handler(struc
  66.419      if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
  66.420          return vmx_failed_vmentry(exit_reason, regs);
  66.421  
  66.422 +    if ( v->arch.hvm_vmx.vmx_realmode )
  66.423 +    {
  66.424 +        unsigned int vector;
  66.425 +
  66.426 +        /* Put RFLAGS back the way the guest wants it */
  66.427 +        regs->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IOPL);
  66.428 +        regs->eflags |= (v->arch.hvm_vmx.vm86_saved_eflags & X86_EFLAGS_IOPL);
  66.429 +
  66.430 +        /* Unless this exit was for an interrupt, we've hit something
  66.431 +         * vm86 can't handle.  Try again, using the emulator. */
  66.432 +        switch ( exit_reason )
  66.433 +        {
  66.434 +        case EXIT_REASON_EXCEPTION_NMI:
  66.435 +            vector = __vmread(VM_EXIT_INTR_INFO) & INTR_INFO_VECTOR_MASK;;
  66.436 +            if ( vector != TRAP_page_fault
  66.437 +                 && vector != TRAP_nmi 
  66.438 +                 && vector != TRAP_machine_check ) 
  66.439 +            {
  66.440 +                perfc_incr(realmode_exits);
  66.441 +                v->arch.hvm_vmx.vmx_emulate = 1;
  66.442 +                return;
  66.443 +            }
  66.444 +        case EXIT_REASON_EXTERNAL_INTERRUPT:
  66.445 +        case EXIT_REASON_INIT:
  66.446 +        case EXIT_REASON_SIPI:
  66.447 +        case EXIT_REASON_PENDING_VIRT_INTR:
  66.448 +        case EXIT_REASON_PENDING_VIRT_NMI:
  66.449 +        case EXIT_REASON_MACHINE_CHECK:
  66.450 +            break;
  66.451 +        default:
  66.452 +            v->arch.hvm_vmx.vmx_emulate = 1;
  66.453 +            perfc_incr(realmode_exits);
  66.454 +            return;
  66.455 +        }
  66.456 +    }
  66.457 +
  66.458      hvm_maybe_deassert_evtchn_irq();
  66.459  
  66.460      /* Event delivery caused this intercept? Queue for redelivery. */
  66.461 @@ -2166,7 +2335,7 @@ asmlinkage void vmx_vmexit_handler(struc
  66.462               */
  66.463              exit_qualification = __vmread(EXIT_QUALIFICATION);
  66.464              write_debugreg(6, exit_qualification | 0xffff0ff0);
  66.465 -            if ( !v->domain->debugger_attached )
  66.466 +            if ( !v->domain->debugger_attached || cpu_has_monitor_trap_flag )
  66.467                  goto exit_and_crash;
  66.468              domain_pause_for_debugger();
  66.469              break;
  66.470 @@ -2356,6 +2525,15 @@ asmlinkage void vmx_vmexit_handler(struc
  66.471          break;
  66.472      }
  66.473  
  66.474 +    case EXIT_REASON_MONITOR_TRAP_FLAG:
  66.475