ia64/linux-2.6.18-xen.hg

view Documentation/block/ioprio.txt @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 Block io priorities
2 ===================
5 Intro
6 -----
8 With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
9 priorities is supported for reads on files. This enables users to io nice
10 processes or process groups, similar to what has been possible to cpu
11 scheduling for ages. This document mainly details the current possibilites
12 with cfq, other io schedulers do not support io priorities so far.
14 Scheduling classes
15 ------------------
17 CFQ implements three generic scheduling classes that determine how io is
18 served for a process.
20 IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
21 higher priority than any other in the system, processes from this class are
22 given first access to the disk every time. Thus it needs to be used with some
23 care, one io RT process can starve the entire system. Within the RT class,
24 there are 8 levels of class data that determine exactly how much time this
25 process needs the disk for on each service. In the future this might change
26 to be more directly mappable to performance, by passing in a wanted data
27 rate instead.
29 IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
30 for any process that hasn't set a specific io priority. The class data
31 determines how much io bandwidth the process will get, it's directly mappable
32 to the cpu nice levels just more coarsely implemented. 0 is the highest
33 BE prio level, 7 is the lowest. The mapping between cpu nice level and io
34 nice level is determined as: io_nice = (cpu_nice + 20) / 5.
36 IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
37 level only get io time when no one else needs the disk. The idle class has no
38 class data, since it doesn't really apply here.
40 Tools
41 -----
43 See below for a sample ionice tool. Usage:
45 # ionice -c<class> -n<level> -p<pid>
47 If pid isn't given, the current process is assumed. IO priority settings
48 are inherited on fork, so you can use ionice to start the process at a given
49 level:
51 # ionice -c2 -n0 /bin/ls
53 will run ls at the best-effort scheduling class at the highest priority.
54 For a running process, you can give the pid instead:
56 # ionice -c1 -n2 -p100
58 will change pid 100 to run at the realtime scheduling class, at priority 2.
60 ---> snip ionice.c tool <---
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <errno.h>
65 #include <getopt.h>
66 #include <unistd.h>
67 #include <sys/ptrace.h>
68 #include <asm/unistd.h>
70 extern int sys_ioprio_set(int, int, int);
71 extern int sys_ioprio_get(int, int);
73 #if defined(__i386__)
74 #define __NR_ioprio_set 289
75 #define __NR_ioprio_get 290
76 #elif defined(__ppc__)
77 #define __NR_ioprio_set 273
78 #define __NR_ioprio_get 274
79 #elif defined(__x86_64__)
80 #define __NR_ioprio_set 251
81 #define __NR_ioprio_get 252
82 #elif defined(__ia64__)
83 #define __NR_ioprio_set 1274
84 #define __NR_ioprio_get 1275
85 #else
86 #error "Unsupported arch"
87 #endif
89 _syscall3(int, ioprio_set, int, which, int, who, int, ioprio);
90 _syscall2(int, ioprio_get, int, which, int, who);
92 enum {
93 IOPRIO_CLASS_NONE,
94 IOPRIO_CLASS_RT,
95 IOPRIO_CLASS_BE,
96 IOPRIO_CLASS_IDLE,
97 };
99 enum {
100 IOPRIO_WHO_PROCESS = 1,
101 IOPRIO_WHO_PGRP,
102 IOPRIO_WHO_USER,
103 };
105 #define IOPRIO_CLASS_SHIFT 13
107 const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
109 int main(int argc, char *argv[])
110 {
111 int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
112 int c, pid = 0;
114 while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
115 switch (c) {
116 case 'n':
117 ioprio = strtol(optarg, NULL, 10);
118 set = 1;
119 break;
120 case 'c':
121 ioprio_class = strtol(optarg, NULL, 10);
122 set = 1;
123 break;
124 case 'p':
125 pid = strtol(optarg, NULL, 10);
126 break;
127 }
128 }
130 switch (ioprio_class) {
131 case IOPRIO_CLASS_NONE:
132 ioprio_class = IOPRIO_CLASS_BE;
133 break;
134 case IOPRIO_CLASS_RT:
135 case IOPRIO_CLASS_BE:
136 break;
137 case IOPRIO_CLASS_IDLE:
138 ioprio = 7;
139 break;
140 default:
141 printf("bad prio class %d\n", ioprio_class);
142 return 1;
143 }
145 if (!set) {
146 if (!pid && argv[optind])
147 pid = strtol(argv[optind], NULL, 10);
149 ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
151 printf("pid=%d, %d\n", pid, ioprio);
153 if (ioprio == -1)
154 perror("ioprio_get");
155 else {
156 ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
157 ioprio = ioprio & 0xff;
158 printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
159 }
160 } else {
161 if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
162 perror("ioprio_set");
163 return 1;
164 }
166 if (argv[optind])
167 execvp(argv[optind], &argv[optind]);
168 }
170 return 0;
171 }
173 ---> snip ionice.c tool <---
176 March 11 2005, Jens Axboe <axboe@suse.de>