direct-io.hg

changeset 12376:f516774cbb27

[IOEMU] Fix Linux smp guest hangs with complaint "BUG: soft lock detected on CPU#0"

When console=ttyS0 in guest grub configuration and serial='pty' in vmx
configure file. The root cause to this bug is the characteristic of
PTY emulator in Qemu. PTY has write/read end with some buffer. Write
to a buffer will be failed after the buffer is full until read end
reads the data from buffer.

Previous to changeset 12026, write to serial port would fail quietly
when buffer for pty is full. With changeset 12026, write to serial
port would retry 3 times in 300ms if failed, even without notifying
guest using serial irq. Smp guest will hang, waiting for an
interrupt. For SMP guest, a watchdog thread should be executed
periodically, otherwise soft lockup is detected. With this patch, an
upper threshold of total consecutive retries is added and serial
interrupt would be sent after retry 3 times for each write request,
even if failed.

Signed-off-by: Xinmei Huang <xinmei.huang@intel.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Nov 13 09:46:05 2006 +0000 (2006-11-13)
parents b1d436f094fa
children 0b385df5f236
files tools/ioemu/hw/serial.c
line diff
     1.1 --- a/tools/ioemu/hw/serial.c	Sat Nov 11 01:40:16 2006 +0000
     1.2 +++ b/tools/ioemu/hw/serial.c	Mon Nov 13 09:46:05 2006 +0000
     1.3 @@ -73,6 +73,11 @@
     1.4  #define UART_LSR_OE	0x02	/* Overrun error indicator */
     1.5  #define UART_LSR_DR	0x01	/* Receiver data ready */
     1.6  
     1.7 +/* Maximum retries for a single byte transmit. */
     1.8 +#define WRITE_MAX_SINGLE_RETRIES 3
     1.9 +/* Maximum retries for a sequence of back-to-back unsuccessful transmits. */
    1.10 +#define WRITE_MAX_TOTAL_RETRIES 10
    1.11 +
    1.12  struct SerialState {
    1.13      uint8_t divider;
    1.14      uint8_t rbr; /* receive register */
    1.15 @@ -98,8 +103,12 @@ struct SerialState {
    1.16       * If a character transmitted via UART cannot be written to its
    1.17       * destination immediately we remember it here and retry a few times via
    1.18       * a polling timer.
    1.19 +     *  - write_single_retries: Number of write retries for current byte.
    1.20 +     *  - write_total_retries:  Number of write retries for back-to-back
    1.21 +     *                          unsuccessful transmits.
    1.22       */
    1.23 -    int write_retries;
    1.24 +    int write_single_retries;
    1.25 +    int write_total_retries;
    1.26      char write_chr;
    1.27      QEMUTimer *write_retry_timer;
    1.28  };
    1.29 @@ -217,16 +226,21 @@ static void serial_chr_write(void *opaqu
    1.30  {
    1.31      SerialState *s = opaque;
    1.32  
    1.33 +    /* Cancel any outstanding retry if this is a new byte. */
    1.34      qemu_del_timer(s->write_retry_timer);
    1.35  
    1.36      /* Retry every 100ms for 300ms total. */
    1.37      if (qemu_chr_write(s->chr, &s->write_chr, 1) == -1) {
    1.38 -        if (s->write_retries++ >= 3)
    1.39 -            printf("serial: write error\n");
    1.40 -        else
    1.41 +        s->write_total_retries++; 
    1.42 +        if (s->write_single_retries++ >= WRITE_MAX_SINGLE_RETRIES)
    1.43 +            fprintf(stderr, "serial: write error\n");
    1.44 +        else if (s->write_total_retries <= WRITE_MAX_TOTAL_RETRIES) {
    1.45              qemu_mod_timer(s->write_retry_timer,
    1.46                             qemu_get_clock(vm_clock) + ticks_per_sec / 10);
    1.47 -        return;
    1.48 +            return;
    1.49 +        }
    1.50 +    } else {
    1.51 +        s->write_total_retries = 0;  /* if successful then reset counter */
    1.52      }
    1.53  
    1.54      /* Success: Notify guest that THR is empty. */
    1.55 @@ -255,7 +269,7 @@ static void serial_ioport_write(void *op
    1.56              s->lsr &= ~UART_LSR_THRE;
    1.57              serial_update_irq(s);
    1.58              s->write_chr = val;
    1.59 -            s->write_retries = 0;
    1.60 +            s->write_single_retries = 0;
    1.61              serial_chr_write(s);
    1.62          }
    1.63          break;