From a44781d7bc09297ab96377fe20a87d1fe3a616d8 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 13 May 2011 13:42:50 -0400 Subject: [PATCH] irqbalance-0.56.tar.bz2 --- root_image/irqbalance/AUTHORS | 3 + root_image/irqbalance/COPYING | 340 +++++++++++++++++++++ root_image/irqbalance/ChangeLog | 3 + root_image/irqbalance/Makefile | 17 ++ root_image/irqbalance/Makefile.am | 39 +++ root_image/irqbalance/NEWS | 1 + root_image/irqbalance/README | 0 root_image/irqbalance/activate.c | 61 ++++ root_image/irqbalance/autogen.sh | 4 + root_image/irqbalance/bitmap.c | 366 ++++++++++++++++++++++ root_image/irqbalance/bitmap.h | 356 ++++++++++++++++++++++ root_image/irqbalance/cap-ng.m4 | 40 +++ root_image/irqbalance/classify.c | 135 +++++++++ root_image/irqbalance/configure.ac | 74 +++++ root_image/irqbalance/constants.h | 30 ++ root_image/irqbalance/cpumask.h | 400 +++++++++++++++++++++++++ root_image/irqbalance/cputree.c | 379 +++++++++++++++++++++++ root_image/irqbalance/irqbalance.1 | 64 ++++ root_image/irqbalance/irqbalance.c | 147 +++++++++ root_image/irqbalance/irqbalance.h | 50 ++++ root_image/irqbalance/irqlist.c | 304 +++++++++++++++++++ root_image/irqbalance/network.c | 206 +++++++++++++ root_image/irqbalance/non-atomic.h | 115 +++++++ root_image/irqbalance/numa.c | 102 +++++++ root_image/irqbalance/placement.c | 359 ++++++++++++++++++++++ root_image/irqbalance/powermode.c | 80 +++++ root_image/irqbalance/procinterrupts.c | 88 ++++++ root_image/irqbalance/types.h | 90 ++++++ 28 files changed, 3853 insertions(+) create mode 100644 root_image/irqbalance/AUTHORS create mode 100644 root_image/irqbalance/COPYING create mode 100644 root_image/irqbalance/ChangeLog create mode 100644 root_image/irqbalance/Makefile create mode 100644 root_image/irqbalance/Makefile.am create mode 100644 root_image/irqbalance/NEWS create mode 100644 root_image/irqbalance/README create mode 100644 root_image/irqbalance/activate.c create mode 100755 root_image/irqbalance/autogen.sh create mode 100644 root_image/irqbalance/bitmap.c create mode 100644 root_image/irqbalance/bitmap.h create mode 100644 root_image/irqbalance/cap-ng.m4 create mode 100644 root_image/irqbalance/classify.c create mode 100644 root_image/irqbalance/configure.ac create mode 100644 root_image/irqbalance/constants.h create mode 100644 root_image/irqbalance/cpumask.h create mode 100644 root_image/irqbalance/cputree.c create mode 100755 root_image/irqbalance/irqbalance.1 create mode 100644 root_image/irqbalance/irqbalance.c create mode 100644 root_image/irqbalance/irqbalance.h create mode 100644 root_image/irqbalance/irqlist.c create mode 100644 root_image/irqbalance/network.c create mode 100644 root_image/irqbalance/non-atomic.h create mode 100644 root_image/irqbalance/numa.c create mode 100644 root_image/irqbalance/placement.c create mode 100644 root_image/irqbalance/powermode.c create mode 100644 root_image/irqbalance/procinterrupts.c create mode 100644 root_image/irqbalance/types.h diff --git a/root_image/irqbalance/AUTHORS b/root_image/irqbalance/AUTHORS new file mode 100644 index 0000000..3cbb8a0 --- /dev/null +++ b/root_image/irqbalance/AUTHORS @@ -0,0 +1,3 @@ +Arjen Van De Ven +Neil Horman + diff --git a/root_image/irqbalance/COPYING b/root_image/irqbalance/COPYING new file mode 100644 index 0000000..d60c31a --- /dev/null +++ b/root_image/irqbalance/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/root_image/irqbalance/ChangeLog b/root_image/irqbalance/ChangeLog new file mode 100644 index 0000000..f5e9428 --- /dev/null +++ b/root_image/irqbalance/ChangeLog @@ -0,0 +1,3 @@ +This is all tracked in the SVN repo. This file is just here to keep the +autotools from complaining + diff --git a/root_image/irqbalance/Makefile b/root_image/irqbalance/Makefile new file mode 100644 index 0000000..2bb41b1 --- /dev/null +++ b/root_image/irqbalance/Makefile @@ -0,0 +1,17 @@ +CFLAGS+=-g -Os -D_FORTIFY_SOURCE=2 -Wall -W `pkg-config --cflags glib-2.0` + +all: irqbalance + +LIBS=bitmap.o irqbalance.o cputree.o procinterrupts.o irqlist.o placement.o activate.o network.o powermode.o numa.o classify.o + +irqbalance: .depend $(LIBS) + gcc -g -O2 -D_FORTIFY_SOURCE=2 -Wall `pkg-config --libs glib-2.0` $(LIBS) -o irqbalance + +clean: + rm -f irqbalance *~ *.o .depend + +# rule for building dependency lists, and writing them to a file +# named ".depend". +.depend: + rm -f .depend + gccmakedep -f- -- $(CFLAGS) -- *.c > .depend diff --git a/root_image/irqbalance/Makefile.am b/root_image/irqbalance/Makefile.am new file mode 100644 index 0000000..d4f256f --- /dev/null +++ b/root_image/irqbalance/Makefile.am @@ -0,0 +1,39 @@ +# Makefile.am -- +# Copyright 2009 Red Hat Inc., Durham, North Carolina. +# All Rights Reserved. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Authors: +# Steve Grubb +# + +AUTOMAKE_OPTIONS = no-dependencies +EXTRA_DIST = README INSTALL COPYING autogen.sh m4/cap-ng.m4 + +INCLUDES = -I${top_srcdir} +LIBS = $(CAPNG_LDADD) $(GLIB_LIBS) +AM_CFLAGS = -g -Os -W -Wall -Wshadow -Wformat -Wundef $(GLIB_CFLAGS) -D_GNU_SOURCE +noinst_HEADERS = bitmap.h constants.h cpumask.h irqbalance.h non-atomic.h \ + types.h +sbin_PROGRAMS = irqbalance +irqbalance_SOURCES = activate.c bitmap.c classify.c cputree.c irqbalance.c \ + irqlist.c network.c numa.c placement.c powermode.c procinterrupts.c + +CONFIG_CLEAN_FILES = debug*.list config/* +clean-generic: + rm -rf autom4te*.cache + rm -f *.rej *.orig *~ + diff --git a/root_image/irqbalance/NEWS b/root_image/irqbalance/NEWS new file mode 100644 index 0000000..7cc0277 --- /dev/null +++ b/root_image/irqbalance/NEWS @@ -0,0 +1 @@ +No news currently diff --git a/root_image/irqbalance/README b/root_image/irqbalance/README new file mode 100644 index 0000000..e69de29 diff --git a/root_image/irqbalance/activate.c b/root_image/irqbalance/activate.c new file mode 100644 index 0000000..68c142c --- /dev/null +++ b/root_image/irqbalance/activate.c @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file contains the code to communicate a selected distribution / mapping + * of interrupts to the kernel. + */ +#include "config.h" +#include +#include +#include +#include + +#include "irqbalance.h" + + +void activate_mapping(void) +{ + struct interrupt *irq; + GList *iter; + + iter = g_list_first(interrupts); + while (iter) { + irq = iter->data; + iter = g_list_next(iter); + + /* don't set the level if it's a NONE irq, or if there is + * no change */ + if (irq->balance_level != BALANCE_NONE && + !cpus_equal(irq->mask, irq->old_mask)) { + char buf[PATH_MAX]; + FILE *file; + sprintf(buf, "/proc/irq/%i/smp_affinity", irq->number); + file = fopen(buf, "w"); + if (!file) + continue; + cpumask_scnprintf(buf, PATH_MAX, irq->mask); + fprintf(file,"%s", buf); + fclose(file); + irq->old_mask = irq->mask; + } + } +} diff --git a/root_image/irqbalance/autogen.sh b/root_image/irqbalance/autogen.sh new file mode 100755 index 0000000..5ad9f14 --- /dev/null +++ b/root_image/irqbalance/autogen.sh @@ -0,0 +1,4 @@ +#! /bin/sh +set -x -e +# --no-recursive is available only in recent autoconf versions +autoreconf -fv --install diff --git a/root_image/irqbalance/bitmap.c b/root_image/irqbalance/bitmap.c new file mode 100644 index 0000000..6a1ceae --- /dev/null +++ b/root_image/irqbalance/bitmap.c @@ -0,0 +1,366 @@ +/* + +This file is taken from the Linux kernel and minimally adapted for use in userspace + +*/ + +/* + * lib/bitmap.c + * Helper functions for bitmap.h. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include "config.h" +#include +#include +#include +#include +#include +#include "bitmap.h" +#include "non-atomic.h" + +/* + * bitmaps provide an array of bits, implemented using an an + * array of unsigned longs. The number of valid bits in a + * given bitmap does _not_ need to be an exact multiple of + * BITS_PER_LONG. + * + * The possible unused bits in the last, partially used word + * of a bitmap are 'don't care'. The implementation makes + * no particular effort to keep them zero. It ensures that + * their value will not affect the results of any operation. + * The bitmap operations that return Boolean (bitmap_empty, + * for example) or scalar (bitmap_weight, for example) results + * carefully filter out these unused bits from impacting their + * results. + * + * These operations actually hold to a slightly stronger rule: + * if you don't input any bitmaps to these ops that have some + * unused bits set, then they won't output any set unused bits + * in output bitmaps. + * + * The byte ordering of bitmaps is more natural on little + * endian architectures. See the big-endian headers + * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h + * for the best explanations of this ordering. + */ + +int __bitmap_empty(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_full(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (~bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] != bitmap2[k]) + return 0; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + dst[k] = ~src[k]; + + if (bits % BITS_PER_LONG) + dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); +} + +/* + * __bitmap_shift_right - logical right shift of the bits in a bitmap + * @dst - destination bitmap + * @src - source bitmap + * @nbits - shift by this many bits + * @bits - bitmap size, in bits + * + * Shifting right (dividing) means moving bits in the MS -> LS bit + * direction. Zeros are fed into the vacated MS positions and the + * LS bits shifted off the bottom are lost. + */ +void __bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int shift, int bits) +{ + int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; + int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + unsigned long mask = (1UL << left) - 1; + for (k = 0; off + k < lim; ++k) { + unsigned long upper, lower; + + /* + * If shift is not word aligned, take lower rem bits of + * word above and make them the top rem bits of result. + */ + if (!rem || off + k + 1 >= lim) + upper = 0; + else { + upper = src[off + k + 1]; + if (off + k + 1 == lim - 1 && left) + upper &= mask; + } + lower = src[off + k]; + if (left && off + k == lim - 1) + lower &= mask; + dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem; + if (left && k == lim - 1) + dst[k] &= mask; + } + if (off) + memset(&dst[lim - off], 0, off*sizeof(unsigned long)); +} + + +/* + * __bitmap_shift_left - logical left shift of the bits in a bitmap + * @dst - destination bitmap + * @src - source bitmap + * @nbits - shift by this many bits + * @bits - bitmap size, in bits + * + * Shifting left (multiplying) means moving bits in the LS -> MS + * direction. Zeros are fed into the vacated LS bit positions + * and those MS bits shifted off the top are lost. + */ + +void __bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int shift, int bits) +{ + int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; + int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + for (k = lim - off - 1; k >= 0; --k) { + unsigned long upper, lower; + + /* + * If shift is not word aligned, take upper rem bits of + * word below and make them the bottom rem bits of result. + */ + if (rem && k > 0) + lower = src[k - 1]; + else + lower = 0; + upper = src[k]; + if (left && k == lim - 1) + upper &= (1UL << left) - 1; + dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem; + if (left && k + off == lim - 1) + dst[k + off] &= (1UL << left) - 1; + } + if (off) + memset(dst, 0, off*sizeof(unsigned long)); +} + +void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] & bitmap2[k]; +} + +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] | bitmap2[k]; +} + +void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] ^ bitmap2[k]; +} + +void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] & ~bitmap2[k]; +} + +int __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] & bitmap2[k]) + return 1; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 1; + return 0; +} + +/* + * Bitmap printing & parsing functions: first version by Bill Irwin, + * second version by Paul Jackson, third by Joe Korty. + */ + +#define CHUNKSZ 32 +#define nbits_to_hold_value(val) fls(val) +#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10)) +#define BASEDEC 10 /* fancier cpuset lists input in decimal */ + +/** + * bitmap_scnprintf - convert bitmap to an ASCII hex string. + * @buf: byte buffer into which string is placed + * @buflen: reserved size of @buf, in bytes + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * + * Exactly @nmaskbits bits are displayed. Hex digits are grouped into + * comma-separated sets of eight digits per set. + */ +int bitmap_scnprintf(char *buf, unsigned int buflen, + const unsigned long *maskp, int nmaskbits) +{ + int i, word, bit, len = 0; + unsigned long val; + const char *sep = ""; + int chunksz; + uint32_t chunkmask; + int first = 1; + + chunksz = nmaskbits & (CHUNKSZ - 1); + if (chunksz == 0) + chunksz = CHUNKSZ; + + i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ; + for (; i >= 0; i -= CHUNKSZ) { + chunkmask = ((1ULL << chunksz) - 1); + word = i / BITS_PER_LONG; + bit = i % BITS_PER_LONG; + val = (maskp[word] >> bit) & chunkmask; + if (val!=0 || !first || i==0) { + len += snprintf(buf+len, buflen-len, "%s%0*lx", sep, + (chunksz+3)/4, val); + chunksz = CHUNKSZ; + sep = ","; + first = 0; + } + } + return len; +} + +/** + * __bitmap_parse - convert an ASCII hex string into a bitmap. + * @buf: pointer to buffer containing string. + * @buflen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @is_user: location of buffer, 0 indicates kernel space + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Commas group hex digits into chunks. Each chunk defines exactly 32 + * bits of the resultant bitmask. No chunk may specify a value larger + * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value + * then leading 0-bits are prepended. %-EINVAL is returned for illegal + * characters and for grouping errors such as "1,,5", ",44", "," and "". + * Leading and trailing whitespace accepted, but not embedded whitespace. + */ +int __bitmap_parse(const char *buf, unsigned int buflen, + int is_user __attribute((unused)), unsigned long *maskp, + int nmaskbits) +{ + int c, old_c, totaldigits, ndigits, nchunks, nbits; + uint32_t chunk; + + bitmap_zero(maskp, nmaskbits); + + nchunks = nbits = totaldigits = c = 0; + do { + chunk = ndigits = 0; + + /* Get the next chunk of the bitmap */ + while (buflen) { + old_c = c; + c = *buf++; + buflen--; + if (isspace(c)) + continue; + + /* + * If the last character was a space and the current + * character isn't '\0', we've got embedded whitespace. + * This is a no-no, so throw an error. + */ + if (totaldigits && c && isspace(old_c)) + return 0; + + /* A '\0' or a ',' signal the end of the chunk */ + if (c == '\0' || c == ',') + break; + + if (!isxdigit(c)) + return -EINVAL; + + /* + * Make sure there are at least 4 free bits in 'chunk'. + * If not, this hexdigit will overflow 'chunk', so + * throw an error. + */ + if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1)) + return -EOVERFLOW; + + chunk = (chunk << 4) | unhex(c); + ndigits++; totaldigits++; + } + if (ndigits == 0) + return -EINVAL; + if (nchunks == 0 && chunk == 0) + continue; + + __bitmap_shift_left(maskp, maskp, CHUNKSZ, nmaskbits); + *maskp |= chunk; + nchunks++; + nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ; + if (nbits > nmaskbits) + return -EOVERFLOW; + } while (buflen && c == ','); + + return 0; +} diff --git a/root_image/irqbalance/bitmap.h b/root_image/irqbalance/bitmap.h new file mode 100644 index 0000000..91ed499 --- /dev/null +++ b/root_image/irqbalance/bitmap.h @@ -0,0 +1,356 @@ +#ifndef __LINUX_BITMAP_H +#define __LINUX_BITMAP_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + + +#define BITS_PER_LONG ((int)sizeof(unsigned long)*8) + +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] +#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) + + +#include "non-atomic.h" + +static inline unsigned int hweight32(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +} + +static inline unsigned long hweight64(uint64_t w) +{ + if (BITS_PER_LONG == 32) + return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); + + w -= (w >> 1) & 0x5555555555555555ull; + w = (w & 0x3333333333333333ull) + ((w >> 2) & 0x3333333333333333ull); + w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0full; + return (w * 0x0101010101010101ull) >> 56; +} + + +static inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +static inline unsigned long hweight_long(unsigned long w) +{ + return sizeof(w) == 4 ? hweight32(w) : hweight64(w); +} + +#define min(x,y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + + +/* + * bitmaps provide bit arrays that consume one or more unsigned + * longs. The bitmap interface and available operations are listed + * here, in bitmap.h + * + * Function implementations generic to all architectures are in + * lib/bitmap.c. Functions implementations that are architecture + * specific are in various include/asm-/bitops.h headers + * and other arch/ specific files. + * + * See lib/bitmap.c for more details. + */ + +/* + * The available bitmap operations and their rough meaning in the + * case that the bitmap is a single unsigned long are thus: + * + * Note that nbits should be always a compile time evaluable constant. + * Otherwise many inlines will generate horrible code. + * + * bitmap_zero(dst, nbits) *dst = 0UL + * bitmap_fill(dst, nbits) *dst = ~0UL + * bitmap_copy(dst, src, nbits) *dst = *src + * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 + * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 + * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 + * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) + * bitmap_complement(dst, src, nbits) *dst = ~(*src) + * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? + * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? + * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2? + * bitmap_empty(src, nbits) Are all bits zero in *src? + * bitmap_full(src, nbits) Are all bits set in *src? + * bitmap_weight(src, nbits) Hamming Weight: number set bits + * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n + * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n + * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) + * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) + * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf + * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf + * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf + * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf + * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list + * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region + * bitmap_release_region(bitmap, pos, order) Free specified bit region + * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region + */ + +/* + * Also the following operations in asm/bitops.h apply to bitmaps. + * + * set_bit(bit, addr) *addr |= bit + * clear_bit(bit, addr) *addr &= ~bit + * change_bit(bit, addr) *addr ^= bit + * test_bit(bit, addr) Is bit set in *addr? + * test_and_set_bit(bit, addr) Set bit and return old value + * test_and_clear_bit(bit, addr) Clear bit and return old value + * test_and_change_bit(bit, addr) Change bit and return old value + * find_first_zero_bit(addr, nbits) Position first zero bit in *addr + * find_first_bit(addr, nbits) Position first set bit in *addr + * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit + * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit + */ + +/* + * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used + * to declare an array named 'name' of just enough unsigned longs to + * contain all bit positions from 0 to 'bits' - 1. + */ + +/* + * lib/bitmap.c provides these functions: + */ + +extern int __bitmap_empty(const unsigned long *bitmap, int bits); +extern int __bitmap_full(const unsigned long *bitmap, int bits); +extern int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, + int bits); +extern void __bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int shift, int bits); +extern void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +extern int __bitmap_weight(const unsigned long *bitmap, int bits); + +extern int bitmap_scnprintf(char *buf, unsigned int len, + const unsigned long *src, int nbits); +extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, + unsigned long *dst, int nbits); +extern int bitmap_scnlistprintf(char *buf, unsigned int len, + const unsigned long *src, int nbits); +extern int bitmap_parselist(const char *buf, unsigned long *maskp, + int nmaskbits); +extern void bitmap_remap(unsigned long *dst, const unsigned long *src, + const unsigned long *old, const unsigned long *new, int bits); +extern int bitmap_bitremap(int oldbit, + const unsigned long *old, const unsigned long *new, int bits); +extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); +extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); +extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); + +#define BITMAP_LAST_WORD_MASK(nbits) \ +( \ + ((nbits) % BITS_PER_LONG) ? \ + (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ +) + +static inline void bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); + } +} + +static inline void bitmap_fill(unsigned long *dst, int nbits) +{ + size_t nlongs = BITS_TO_LONGS(nbits); + if (nlongs > 1) { + int len = (nlongs - 1) * sizeof(unsigned long); + memset(dst, 0xff, len); + } + dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits); +} + +static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, + int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memcpy(dst, src, len); + } +} + +static inline void bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 & *src2; + else + __bitmap_and(dst, src1, src2, nbits); +} + +static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 | *src2; + else + __bitmap_or(dst, src1, src2, nbits); +} + +static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 ^ *src2; + else + __bitmap_xor(dst, src1, src2, nbits); +} + +static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src1 & ~(*src2); + else + __bitmap_andnot(dst, src1, src2, nbits); +} + +static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, + int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits); + else + __bitmap_complement(dst, src, nbits); +} + +static inline int bitmap_equal(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_equal(src1, src2, nbits); +} + +static inline int bitmap_intersects(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; + else + return __bitmap_intersects(src1, src2, nbits); +} + +static inline int bitmap_subset(const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_subset(src1, src2, nbits); +} + +static inline int bitmap_empty(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_empty(src, nbits); +} + +static inline int bitmap_full(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_full(src, nbits); +} + +static inline int bitmap_weight(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight(src, nbits); +} + +static inline void bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int n, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = *src >> n; + else + __bitmap_shift_right(dst, src, n, nbits); +} + +static inline void bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int n, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits); + else + __bitmap_shift_left(dst, src, n, nbits); +} + +static inline int bitmap_parse(const char *buf, unsigned int buflen, + unsigned long *maskp, int nmaskbits) +{ + return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __LINUX_BITMAP_H */ diff --git a/root_image/irqbalance/cap-ng.m4 b/root_image/irqbalance/cap-ng.m4 new file mode 100644 index 0000000..0024edc --- /dev/null +++ b/root_image/irqbalance/cap-ng.m4 @@ -0,0 +1,40 @@ +# libcap-ng.m4 - Checks for the libcap-ng support +# Copyright (c) 2009 Steve Grubb sgrubb@redhat.com +# +AC_DEFUN([LIBCAP_NG_PATH], +[ + AC_ARG_WITH(libcap-ng, + [ --with-libcap-ng=[auto/yes/no] Add Libcap-ng support [default=auto]],, + with_libcap_ng=auto) + + # Check for Libcap-ng API + # + # libcap-ng detection + + if test x$with_libcap_ng = xno ; then + have_libcap_ng=no; + else + # Start by checking for header file + AC_CHECK_HEADER(cap-ng.h, capng_headers=yes, capng_headers=no) + + # See if we have libcap-ng library + AC_CHECK_LIB(cap-ng, capng_clear, + CAPNG_LDADD=-lcap-ng,) + + # Check results are usable + if test x$with_libcap_ng = xyes -a x$CAPNG_LDADD = x ; then + AC_MSG_ERROR(libcap-ng support was requested and the library was not found) + fi + if test x$CAPNG_LDADD != x -a $capng_headers = no ; then + AC_MSG_ERROR(libcap-ng libraries found but headers are missing) + fi + fi + AC_SUBST(CAPNG_LDADD) + AC_MSG_CHECKING(whether to use libcap-ng) + if test x$CAPNG_LDADD != x ; then + AC_DEFINE(HAVE_LIBCAP_NG,1,[libcap-ng support]) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi +]) diff --git a/root_image/irqbalance/classify.c b/root_image/irqbalance/classify.c new file mode 100644 index 0000000..8c8d18b --- /dev/null +++ b/root_image/irqbalance/classify.c @@ -0,0 +1,135 @@ +#include "config.h" +#include +#include +#include + +#include "irqbalance.h" +#include "types.h" + + +char *classes[] = { + "other", + "legacy", + "storage", + "timer", + "ethernet", + "gbit-ethernet", + "10gbit-ethernet", + 0 +}; + +int map_class_to_level[7] = +{ BALANCE_PACKAGE, BALANCE_CACHE, BALANCE_CACHE, BALANCE_NONE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE }; + + +int class_counts[7]; + +/* + +NOTE NOTE although that this file has a hard-coded list of modules, something missing is not + a big deal; the types are also set based on PCI class information when available. + +*/ + +/* + + Based on the original irqbalance code which is: + + Copyright (C) 2003 Red Hat, Inc. All rights reserved. + + Usage and distribution of this file are subject to the Gnu General Public License Version 2 + that can be found at http://www.gnu.org/licenses/gpl.txt and the COPYING file as + distributed together with this file is included herein by reference. + + Author: Arjan van de Ven + +*/ + +static char *legacy_modules[] = { + "PS/2", + "serial", + "i8042", + "acpi", + "floppy", + "parport", + "keyboard", + "usb-ohci", + "usb-uhci", + "uhci_hcd", + "ohci_hcd", + "ehci_hcd", + "EMU10K1", + 0 +}; + +static char *timer_modules[] = { + "rtc", + "timer", + 0 +}; + +static char *storage_modules[] = { + "aic7xxx", + "aic79xx", + "ide", + "cciss", + "cpqarray", + "qla2", + "megaraid", + "fusion", + "libata", + "ohci1394", + "sym53c8xx", + 0 +}; + +static char *ethernet_modules[] = { + "eth", + "e100", + "eepro100", + "orinoco_cs", + "wvlan_cs", + "3c5", + "HiSax", + "skge", + "sky2", + 0 +}; + + +int find_class(struct interrupt *irq, char *moduletext) +{ + int guess = IRQ_OTHER; + int i; + + if (moduletext == NULL) + return guess; + + for (i=0; legacy_modules[i]; i++) + if (strstr(moduletext, legacy_modules[i])) + guess = IRQ_LEGACY; + + for (i=0; storage_modules[i]; i++) + if (strstr(moduletext, storage_modules[i])) + guess = IRQ_SCSI; + + for (i=0; timer_modules[i]; i++) + if (strstr(moduletext, timer_modules[i])) + guess = IRQ_TIMER; + + for (i=0; ethernet_modules[i]; i++) + if (strstr(moduletext, ethernet_modules[i])) { + guess = IRQ_ETH; + if (strstr(moduletext, "-rx")) + guess = IRQ_GETH; + if (strstr(moduletext, "-tx")) + guess = IRQ_TGETH; + } + + if (guess == IRQ_OTHER && irq->number==0) + guess = IRQ_TIMER; + + if (guess > irq->class) + return guess; + return irq->class; +} diff --git a/root_image/irqbalance/configure.ac b/root_image/irqbalance/configure.ac new file mode 100644 index 0000000..518dcc4 --- /dev/null +++ b/root_image/irqbalance/configure.ac @@ -0,0 +1,74 @@ +dnl +define([AC_INIT_NOTICE], +[### Generated automatically using autoconf version] AC_ACVERSION [ +### Copyright 2009 Steve Grubb +### +### Permission is hereby granted, free of charge, to any person obtaining a +### copy of this software and associated documentation files (the "Software"), +### to deal in the Software without restriction, including without limitation +### the rights to use, copy, modify, merge, publish, distribute, sublicense, +### and/or sell copies of the Software, and to permit persons to whom the +### Software is furnished to do so, subject to the following conditions: +### +### The above copyright notice and this permission notice shall be included +### in all copies or substantial portions of the Software. +### +### THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +### IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +### FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +### THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +### OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +### ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +### OTHER DEALINGS IN THE SOFTWARE. +### +### For usage, run `./configure --help' +### For more detailed information on installation, read the file `INSTALL'. +### +### If configuration succeeds, status is in the file `config.status'. +### A log of configuration tests is in `config.log'. +]) + +AC_REVISION($Revision: 1.3 $)dnl +AC_INIT(irqbalance,0.56) +AC_PREREQ(2.12)dnl +AM_CONFIG_HEADER(config.h) + +echo Configuring irqbalance $VERSION + +AC_CONFIG_MACRO_DIR([m4]) +AC_CANONICAL_TARGET +AM_INIT_AUTOMAKE +AM_PROG_LIBTOOL +AC_SUBST(LIBTOOL_DEPS) + +echo . +echo Checking for programs + +AC_PROG_CC +AC_PROG_INSTALL +AC_PROG_AWK + +echo . +echo Checking for header files +AC_HEADER_STDC +AC_CHECK_HEADERS(linux/ethtool.h linux/sockios.h, [], []) + +AC_C_CONST +AC_C_INLINE +AM_PROG_CC_C_O + +PKG_CHECK_MODULES([GLIB], [glib-2.0]) +LIBCAP_NG_PATH + +AC_OUTPUT(Makefile) + +echo . +echo " + + irqbalance Version: $VERSION + Target: $target + Installation prefix: $prefix + Compiler: $CC + Compiler flags: +`echo $CFLAGS | fmt -w 50 | sed 's,^, ,'` +" diff --git a/root_image/irqbalance/constants.h b/root_image/irqbalance/constants.h new file mode 100644 index 0000000..1439e0c --- /dev/null +++ b/root_image/irqbalance/constants.h @@ -0,0 +1,30 @@ +#ifndef __INCLUDE_GUARD_CONSTANTS_H +#define __INCLUDE_GUARD_CONSTANTS_H + +/* interval between rebalance attempts in seconds */ +#define SLEEP_INTERVAL 10 + +/* NUMA topology refresh intervals, in units of SLEEP_INTERVAL */ +#define NUMA_REFRESH_INTERVAL 32 +/* NIC interrupt refresh interval, in units of SLEEP_INTERVAL */ +#define NIC_REFRESH_INTERVAL 32 + +/* minimum number of interrupts since boot for an interrupt to matter */ +#define MIN_IRQ_COUNT 20 + + +/* balancing tunings */ + +#define CROSS_PACKAGE_PENALTY 3000 +#define NUMA_PENALTY 250 +#define POWER_MODE_PACKAGE_THRESHOLD 20000 +#define CLASS_VIOLATION_PENTALTY 6000 +#define CORE_SPECIFIC_THRESHOLD 5000 + +/* power mode */ + +#define POWER_MODE_SOFTIRQ_THRESHOLD 20 +#define POWER_MODE_HYSTERESIS 3 + + +#endif diff --git a/root_image/irqbalance/cpumask.h b/root_image/irqbalance/cpumask.h new file mode 100644 index 0000000..8c6606a --- /dev/null +++ b/root_image/irqbalance/cpumask.h @@ -0,0 +1,400 @@ +#ifndef __LINUX_CPUMASK_H +#define __LINUX_CPUMASK_H + +#define NR_CPUS 256 +/* + * Cpumasks provide a bitmap suitable for representing the + * set of CPU's in a system, one bit position per CPU number. + * + * See detailed comments in the file linux/bitmap.h describing the + * data type on which these cpumasks are based. + * + * For details of cpumask_scnprintf() and cpumask_parse_user(), + * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. + * For details of cpulist_scnprintf() and cpulist_parse(), see + * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. + * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c + * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c. + * + * The available cpumask operations are: + * + * void cpu_set(cpu, mask) turn on bit 'cpu' in mask + * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask + * void cpus_setall(mask) set all bits + * void cpus_clear(mask) clear all bits + * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask + * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask + * + * void cpus_and(dst, src1, src2) dst = src1 & src2 [intersection] + * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] + * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 + * void cpus_andnot(dst, src1, src2) dst = src1 & ~src2 + * void cpus_complement(dst, src) dst = ~src + * + * int cpus_equal(mask1, mask2) Does mask1 == mask2? + * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect? + * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2? + * int cpus_empty(mask) Is mask empty (no bits sets)? + * int cpus_full(mask) Is mask full (all bits sets)? + * int cpus_weight(mask) Hamming weigh - number of set bits + * + * void cpus_shift_right(dst, src, n) Shift right + * void cpus_shift_left(dst, src, n) Shift left + * + * int first_cpu(mask) Number lowest set bit, or NR_CPUS + * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS + * + * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set + * CPU_MASK_ALL Initializer - all bits set + * CPU_MASK_NONE Initializer - no bits set + * unsigned long *cpus_addr(mask) Array of unsigned long's in mask + * + * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing + * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask + * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing + * int cpulist_parse(buf, map) Parse ascii string as cpulist + * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit) + * int cpus_remap(dst, src, old, new) *dst = map(old, new)(src) + * + * for_each_cpu_mask(cpu, mask) for-loop cpu over mask + * + * int num_online_cpus() Number of online CPUs + * int num_possible_cpus() Number of all possible CPUs + * int num_present_cpus() Number of present CPUs + * + * int cpu_online(cpu) Is some cpu online? + * int cpu_possible(cpu) Is some cpu possible? + * int cpu_present(cpu) Is some cpu present (can schedule)? + * + * int any_online_cpu(mask) First online cpu in mask + * + * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map + * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map + * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map + * + * Subtlety: + * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway) + * to generate slightly worse code. Note for example the additional + * 40 lines of assembly code compiling the "for each possible cpu" + * loops buried in the disk_stat_read() macros calls when compiling + * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple + * one-line #define for cpu_isset(), instead of wrapping an inline + * inside a macro, the way we do the other calls. + */ + +#include "bitmap.h" + +typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; +extern cpumask_t _unused_cpumask_arg_; + +#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst)) +static inline void __cpu_set(int cpu, volatile cpumask_t *dstp) +{ + set_bit(cpu, dstp->bits); +} + +#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst)) +static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp) +{ + clear_bit(cpu, dstp->bits); +} + +#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS) +static inline void __cpus_setall(cpumask_t *dstp, int nbits) +{ + bitmap_fill(dstp->bits, nbits); +} + +#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS) +static inline void __cpus_clear(cpumask_t *dstp, int nbits) +{ + bitmap_zero(dstp->bits, nbits); +} + +/* No static inline type checking - see Subtlety (1) above. */ +#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits) + +#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_andnot(dst, src1, src2) \ + __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) +static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); +} + +#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS) +static inline void __cpus_complement(cpumask_t *dstp, + const cpumask_t *srcp, int nbits) +{ + bitmap_complement(dstp->bits, srcp->bits, nbits); +} + +#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS) +static inline int __cpus_equal(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_equal(src1p->bits, src2p->bits, nbits); +} + +#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS) +static inline int __cpus_intersects(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_intersects(src1p->bits, src2p->bits, nbits); +} + +#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS) +static inline int __cpus_subset(const cpumask_t *src1p, + const cpumask_t *src2p, int nbits) +{ + return bitmap_subset(src1p->bits, src2p->bits, nbits); +} + +#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) +static inline int __cpus_empty(const cpumask_t *srcp, int nbits) +{ + return bitmap_empty(srcp->bits, nbits); +} + +#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS) +static inline int __cpus_full(const cpumask_t *srcp, int nbits) +{ + return bitmap_full(srcp->bits, nbits); +} + +#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS) +static inline int __cpus_weight(const cpumask_t *srcp, int nbits) +{ + return bitmap_weight(srcp->bits, nbits); +} + +#define cpus_shift_right(dst, src, n) \ + __cpus_shift_right(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_right(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); +} + +#define cpus_shift_left(dst, src, n) \ + __cpus_shift_left(&(dst), &(src), (n), NR_CPUS) +static inline void __cpus_shift_left(cpumask_t *dstp, + const cpumask_t *srcp, int n, int nbits) +{ + bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); +} + +static inline int __first_cpu(const cpumask_t *srcp) +{ + return ffs(*srcp->bits)-1; +} + +#define first_cpu(src) __first_cpu(&(src)) +int __next_cpu(int n, const cpumask_t *srcp); +#define next_cpu(n, src) __next_cpu((n), &(src)) + +#define cpumask_of_cpu(cpu) \ +({ \ + typeof(_unused_cpumask_arg_) m; \ + if (sizeof(m) == sizeof(unsigned long)) { \ + m.bits[0] = 1UL<<(cpu); \ + } else { \ + cpus_clear(m); \ + cpu_set((cpu), m); \ + } \ + m; \ +}) + +#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS) + +#if 0 + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#else + +#define CPU_MASK_ALL \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ + [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ +} } + +#endif + +#define CPU_MASK_NONE \ +(cpumask_t) { { \ + [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \ +} } + +#define CPU_MASK_CPU0 \ +(cpumask_t) { { \ + [0] = 1UL \ +} } + +#define cpus_addr(src) ((src).bits) + +#define cpumask_scnprintf(buf, len, src) \ + __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) +static inline int __cpumask_scnprintf(char *buf, int len, + const cpumask_t *srcp, int nbits) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nbits); +} + +#define cpumask_parse_user(ubuf, ulen, dst) \ + __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) +static inline int __cpumask_parse_user(const char *buf, int len, + cpumask_t *dstp, int nbits) +{ + return bitmap_parse(buf, len, dstp->bits, nbits); +} + +#define cpulist_scnprintf(buf, len, src) \ + __cpulist_scnprintf((buf), (len), &(src), NR_CPUS) +static inline int __cpulist_scnprintf(char *buf, int len, + const cpumask_t *srcp, int nbits) +{ + return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); +} + +#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS) +static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits) +{ + return bitmap_parselist(buf, dstp->bits, nbits); +} + +#define cpu_remap(oldbit, old, new) \ + __cpu_remap((oldbit), &(old), &(new), NR_CPUS) +static inline int __cpu_remap(int oldbit, + const cpumask_t *oldp, const cpumask_t *newp, int nbits) +{ + return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits); +} + +#define cpus_remap(dst, src, old, new) \ + __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS) +static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp, + const cpumask_t *oldp, const cpumask_t *newp, int nbits) +{ + bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits); +} + +#if NR_CPUS > 1 +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = first_cpu(mask); \ + (cpu) < NR_CPUS; \ + (cpu) = next_cpu((cpu), (mask))) +#else /* NR_CPUS == 1 */ +#define for_each_cpu_mask(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#endif /* NR_CPUS */ + +/* + * The following particular system cpumasks and operations manage + * possible, present and online cpus. Each of them is a fixed size + * bitmap of size NR_CPUS. + * + * #ifdef CONFIG_HOTPLUG_CPU + * cpu_possible_map - has bit 'cpu' set iff cpu is populatable + * cpu_present_map - has bit 'cpu' set iff cpu is populated + * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler + * #else + * cpu_possible_map - has bit 'cpu' set iff cpu is populated + * cpu_present_map - copy of cpu_possible_map + * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler + * #endif + * + * In either case, NR_CPUS is fixed at compile time, as the static + * size of these bitmaps. The cpu_possible_map is fixed at boot + * time, as the set of CPU id's that it is possible might ever + * be plugged in at anytime during the life of that system boot. + * The cpu_present_map is dynamic(*), representing which CPUs + * are currently plugged in. And cpu_online_map is the dynamic + * subset of cpu_present_map, indicating those CPUs available + * for scheduling. + * + * If HOTPLUG is enabled, then cpu_possible_map is forced to have + * all NR_CPUS bits set, otherwise it is just the set of CPUs that + * ACPI reports present at boot. + * + * If HOTPLUG is enabled, then cpu_present_map varies dynamically, + * depending on what ACPI reports as currently plugged in, otherwise + * cpu_present_map is just a copy of cpu_possible_map. + * + * (*) Well, cpu_present_map is dynamic in the hotplug case. If not + * hotplug, it's a copy of cpu_possible_map, hence fixed at boot. + * + * Subtleties: + * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode + * assumption that their single CPU is online. The UP + * cpu_{online,possible,present}_maps are placebos. Changing them + * will have no useful affect on the following num_*_cpus() + * and cpu_*() macros in the UP case. This ugliness is a UP + * optimization - don't waste any instructions or memory references + * asking if you're online or how many CPUs there are if there is + * only one CPU. + * 2) Most SMP arch's #define some of these maps to be some + * other map specific to that arch. Therefore, the following + * must be #define macros, not inlines. To see why, examine + * the assembly code produced by the following. Note that + * set1() writes phys_x_map, but set2() writes x_map: + * int x_map, phys_x_map; + * #define set1(a) x_map = a + * inline void set2(int a) { x_map = a; } + * #define x_map phys_x_map + * main(){ set1(3); set2(5); } + */ + +extern cpumask_t cpu_possible_map; +extern cpumask_t cpu_online_map; +extern cpumask_t cpu_present_map; + +#if NR_CPUS > 1 +#define num_online_cpus() cpus_weight(cpu_online_map) +#define num_possible_cpus() cpus_weight(cpu_possible_map) +#define num_present_cpus() cpus_weight(cpu_present_map) +#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map) +#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map) +#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map) +#else +#define num_online_cpus() 1 +#define num_possible_cpus() 1 +#define num_present_cpus() 1 +#define cpu_online(cpu) ((cpu) == 0) +#define cpu_possible(cpu) ((cpu) == 0) +#define cpu_present(cpu) ((cpu) == 0) +#endif + +int highest_possible_processor_id(void); +#define any_online_cpu(mask) __any_online_cpu(&(mask)) +int __any_online_cpu(const cpumask_t *mask); + +#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map) +#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map) +#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map) + +#endif /* __LINUX_CPUMASK_H */ diff --git a/root_image/irqbalance/cputree.c b/root_image/irqbalance/cputree.c new file mode 100644 index 0000000..280bbbf --- /dev/null +++ b/root_image/irqbalance/cputree.c @@ -0,0 +1,379 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file contains the code to construct and manipulate a hierarchy of processors, + * cache domains and processor cores. + */ + +#include "config.h" +#include +#include +#include +#include +#include + +#include + +#include "irqbalance.h" + + +GList *cpus; +GList *cache_domains; +GList *packages; + +int package_count; +int cache_domain_count; +int core_count; + +/* Users want to be able to keep interrupts away from some cpus; store these in a cpumask_t */ +cpumask_t banned_cpus; + + +/* + it's convenient to have the complement of banned_cpus available so that + the AND operator can be used to mask out unwanted cpus +*/ +static cpumask_t unbanned_cpus; + +static void fill_packages(void) +{ + GList *entry; + + entry = g_list_first(cache_domains); + while (entry) { + struct package *package; + struct cache_domain *cache = NULL; + GList *entry2; + + cache = entry->data; + entry2 = entry; + entry = g_list_next(entry); + if (cache->marker) + continue; + package = malloc(sizeof(struct package)); + if (!package) + break; + memset(package, 0, sizeof(struct package)); + package->mask = cache->package_mask; + package->number = cache->number; + while (entry2) { + struct cache_domain *cache2; + cache2 = entry2->data; + if (cpus_equal(cache->package_mask, cache2->package_mask)) { + cache2->marker = 1; + package->cache_domains = g_list_append(package->cache_domains, cache2); + if (package->number > cache2->number) + package->number = cache2->number; + } + entry2 = g_list_next(entry2); + } + packages = g_list_append(packages, package); + package_count++; + } +} + +static void fill_cache_domain(void) +{ + GList *entry; + + entry = g_list_first(cpus); + while (entry) { + struct cache_domain *cache = NULL; + struct cpu_core *cpu; + GList *entry2; + cpu = entry->data; + entry2 = entry; + entry = g_list_next(entry); + if (cpu->marker) + continue; + cache = malloc(sizeof(struct cache_domain)); + if (!cache) + break; + memset(cache, 0, sizeof(struct cache_domain)); + cache->mask = cpu->cache_mask; + cache->package_mask = cpu->package_mask; + cache->number = cpu->number; + cache_domains = g_list_append(cache_domains, cache); + cache_domain_count++; + while (entry2) { + struct cpu_core *cpu2; + cpu2 = entry2->data; + if (cpus_equal(cpu->cache_mask, cpu2->cache_mask) && + cpus_equal(cpu->package_mask, cpu2->package_mask)) { + cpu2->marker = 1; + cache->cpu_cores = g_list_append(cache->cpu_cores, cpu2); + if (cpu2->number < cache->number) + cache->number = cpu2->number; + } + entry2 = g_list_next(entry2); + } + } +} + + +static void do_one_cpu(char *path) +{ + struct cpu_core *cpu; + FILE *file; + char new_path[PATH_MAX]; + + /* skip offline cpus */ + snprintf(new_path, PATH_MAX, "%s/online", path); + file = fopen(new_path, "r"); + if (file) { + char *line = NULL; + size_t size = 0; + if (getline(&line, &size, file)==0) + return; + fclose(file); + if (line && line[0]=='0') { + free(line); + return; + } + free(line); + } + + cpu = malloc(sizeof(struct cpu_core)); + if (!cpu) + return; + memset(cpu, 0, sizeof(struct cpu_core)); + + cpu->number = strtoul(&path[27], NULL, 10); + + cpu_set(cpu->number, cpu->mask); + + /* if the cpu is on the banned list, just don't add it */ + if (cpus_intersects(cpu->mask, banned_cpus)) { + free(cpu); + /* even though we don't use the cpu we do need to count it */ + core_count++; + return; + } + + + /* try to read the package mask; if it doesn't exist assume solitary */ + snprintf(new_path, PATH_MAX, "%s/topology/core_siblings", path); + file = fopen(new_path, "r"); + cpu_set(cpu->number, cpu->package_mask); + if (file) { + char *line = NULL; + size_t size = 0; + if (getline(&line, &size, file)) + cpumask_parse_user(line, strlen(line), cpu->package_mask); + fclose(file); + free(line); + } + + /* try to read the cache mask; if it doesn't exist assume solitary */ + /* We want the deepest cache level available so try index1 first, then index2 */ + cpu_set(cpu->number, cpu->cache_mask); + snprintf(new_path, PATH_MAX, "%s/cache/index1/shared_cpu_map", path); + file = fopen(new_path, "r"); + if (file) { + char *line = NULL; + size_t size = 0; + if (getline(&line, &size, file)) + cpumask_parse_user(line, strlen(line), cpu->cache_mask); + fclose(file); + free(line); + } + snprintf(new_path, PATH_MAX, "%s/cache/index2/shared_cpu_map", path); + file = fopen(new_path, "r"); + if (file) { + char *line = NULL; + size_t size = 0; + if (getline(&line, &size, file)) + cpumask_parse_user(line, strlen(line), cpu->cache_mask); + fclose(file); + free(line); + } + + /* + blank out the banned cpus from the various masks so that interrupts + will never be told to go there + */ + cpus_and(cpu->cache_mask, cpu->cache_mask, unbanned_cpus); + cpus_and(cpu->package_mask, cpu->package_mask, unbanned_cpus); + cpus_and(cpu->mask, cpu->mask, unbanned_cpus); + + cpus = g_list_append(cpus, cpu); + core_count++; +} + +static void dump_irqs(int spaces, GList *interrupts) +{ + struct interrupt *irq; + while (interrupts) { + int i; + for (i=0; idata; + printf("Interrupt %i (%s/%u) \n", irq->number, classes[irq->class], (unsigned int)irq->workload); + interrupts = g_list_next(interrupts); + } +} + +void dump_tree(void) +{ + GList *p_iter, *c_iter, *cp_iter; + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + + char buffer[4096]; + p_iter = g_list_first(packages); + while (p_iter) { + package = p_iter->data; + cpumask_scnprintf(buffer, 4096, package->mask); + printf("Package %i: cpu mask is %s (workload %lu)\n", package->number, buffer, (unsigned long)package->workload); + c_iter = g_list_first(package->cache_domains); + while (c_iter) { + cache_domain = c_iter->data; + c_iter = g_list_next(c_iter); + cpumask_scnprintf(buffer, 4095, cache_domain->mask); + printf(" Cache domain %i: cpu mask is %s (workload %lu) \n", cache_domain->number, buffer, (unsigned long)cache_domain->workload); + cp_iter = cache_domain->cpu_cores; + while (cp_iter) { + cpu = cp_iter->data; + cp_iter = g_list_next(cp_iter); + printf(" CPU number %i (workload %lu)\n", cpu->number, (unsigned long)cpu->workload); + dump_irqs(18, cpu->interrupts); + } + dump_irqs(10, cache_domain->interrupts); + } + dump_irqs(2, package->interrupts); + p_iter = g_list_next(p_iter); + } +} + +/* + * this function removes previous state from the cpu tree, such as + * which level does how much work and the actual lists of interrupts + * assigned to each component + */ +void clear_work_stats(void) +{ + GList *p_iter, *c_iter, *cp_iter; + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + + p_iter = g_list_first(packages); + while (p_iter) { + package = p_iter->data; + package->workload = 0; + g_list_free(package->interrupts); + package->interrupts = NULL; + c_iter = g_list_first(package->cache_domains); + memset(package->class_count, 0, sizeof(package->class_count)); + while (c_iter) { + cache_domain = c_iter->data; + c_iter = g_list_next(c_iter); + cache_domain->workload = 0; + cp_iter = cache_domain->cpu_cores; + g_list_free(cache_domain->interrupts); + cache_domain->interrupts = NULL; + memset(cache_domain->class_count, 0, sizeof(cache_domain->class_count)); + while (cp_iter) { + cpu = cp_iter->data; + cp_iter = g_list_next(cp_iter); + cpu->workload = 0; + g_list_free(cpu->interrupts); + cpu->interrupts = NULL; + memset(cpu->class_count, 0, sizeof(cpu->class_count)); + } + } + p_iter = g_list_next(p_iter); + } +} + + +void parse_cpu_tree(void) +{ + DIR *dir; + struct dirent *entry; + + cpus_complement(unbanned_cpus, banned_cpus); + + dir = opendir("/sys/devices/system/cpu"); + if (!dir) + return; + do { + entry = readdir(dir); + if (entry && strlen(entry->d_name)>3 && strstr(entry->d_name,"cpu")) { + char new_path[PATH_MAX]; + sprintf(new_path, "/sys/devices/system/cpu/%s", entry->d_name); + do_one_cpu(new_path); + } + } while (entry); + closedir(dir); + + fill_cache_domain(); + fill_packages(); + + if (debug_mode) + dump_tree(); + +} + + +/* + * This function frees all memory related to a cpu tree so that a new tree + * can be read + */ +void clear_cpu_tree(void) +{ + GList *item; + struct cpu_core *cpu; + struct cache_domain *cache_domain; + struct package *package; + + while (packages) { + item = g_list_first(packages); + package = item->data; + g_list_free(package->cache_domains); + g_list_free(package->interrupts); + free(package); + packages = g_list_delete_link(packages, item); + } + package_count = 0; + + while (cache_domains) { + item = g_list_first(cache_domains); + cache_domain = item->data; + g_list_free(cache_domain->cpu_cores); + g_list_free(cache_domain->interrupts); + free(cache_domain); + cache_domains = g_list_delete_link(cache_domains, item); + } + cache_domain_count = 0; + + + while (cpus) { + item = g_list_first(cpus); + cpu = item->data; + g_list_free(cpu->interrupts); + free(cpu); + cpus = g_list_delete_link(cpus, item); + } + core_count = 0; + +} diff --git a/root_image/irqbalance/irqbalance.1 b/root_image/irqbalance/irqbalance.1 new file mode 100755 index 0000000..c26c709 --- /dev/null +++ b/root_image/irqbalance/irqbalance.1 @@ -0,0 +1,64 @@ +.\"Generated by db2man.xsl. Don't modify this, modify the source. +.de Sh \" Subsection +.br +.if t .Sp +.ne 5 +.PP +\fB\\$1\fR +.PP +.. +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Ip \" List item +.br +.ie \\n(.$>=3 .ne \\$3 +.el .ne 3 +.IP "\\$1" \\$2 +.. +.TH "IRQBALANCE" 1 "Dec 2006" "Linux" "irqbalance" +.SH NAME +irqbalance \- distribute hardware interrupts across processors on a multiprocessor system +.SH "SYNOPSIS" + +.nf +\fBirqbalance\fR +.fi + +.SH "DESCRIPTION" + +.PP +The purpose of \fBirqbalance\fR is distribute hardware interrupts across processors on a multiprocessor system in order to increase performance\&. + +.SH "OPTIONS" + +.TP +.B --oneshot +Causes irqbalance to be run once, after which the daemon exits +.TP + +.B --debug +Causes irqbalance to run in the foreground and extra debug information to be printed + +.SH "ENVIRONMENT VARIABLES" +.TP +.B IRQBALANCE_ONESHOT +Same as --oneshot + +.TP +.B IRQBALANCE_DEBUG +Same as --debug + +.TP +.B IRQBALANCE_BANNED_CPUS +Provides a mask of cpus which irqbalance should ignore and never assign interrupts to + +.TP +.B IRQBALANCE_BANNED_INTERRUPTS +A list of space delimited IRQ numbers that irqbalance should not touch + +.SH "Homepage" +http://www.irqbalance.org + + diff --git a/root_image/irqbalance/irqbalance.c b/root_image/irqbalance/irqbalance.c new file mode 100644 index 0000000..5bce9a4 --- /dev/null +++ b/root_image/irqbalance/irqbalance.c @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include "config.h" +#include +#include +#include +#include +#include +#ifdef HAVE_LIBCAP_NG +#include +#endif +#include "irqbalance.h" + +int one_shot_mode; +int debug_mode; + +int need_cpu_rescan; + +extern cpumask_t banned_cpus; + +static int counter; + + +void sleep_approx(int seconds) +{ + struct timespec ts; + struct timeval tv; + gettimeofday(&tv, NULL); + ts.tv_sec = seconds; + ts.tv_nsec = -tv.tv_usec*1000; + while (ts.tv_nsec < 0) { + ts.tv_sec--; + ts.tv_nsec += 1000000000; + } + nanosleep(&ts, NULL); +} + +int main(int argc, char** argv) +{ + if (argc>1 && strstr(argv[1],"debug")) + debug_mode=1; + if (argc>1 && strstr(argv[1],"oneshot")) + one_shot_mode=1; + + if (getenv("IRQBALANCE_BANNED_CPUS")) { + cpumask_parse_user(getenv("IRQBALANCE_BANNED_CPUS"), strlen(getenv("IRQBALANCE_BANNED_CPUS")), banned_cpus); + } + + if (getenv("IRQBALANCE_ONESHOT")) + one_shot_mode=1; + + if (getenv("IRQBALANCE_DEBUG")) + debug_mode=1; + + parse_cpu_tree(); + + + /* On single core UP systems irqbalance obviously has no work to do */ + if (core_count<2) + exit(EXIT_SUCCESS); + /* On dual core/hyperthreading shared cache systems just do a one shot setup */ + if (cache_domain_count==1) + one_shot_mode = 1; + + if (!debug_mode) + if (daemon(0,0)) + exit(EXIT_FAILURE); + +#ifdef HAVE_LIBCAP_NG + // Drop capabilities + capng_clear(CAPNG_SELECT_BOTH); + capng_lock(); + capng_apply(CAPNG_SELECT_BOTH); +#endif + + parse_proc_interrupts(); + sleep(SLEEP_INTERVAL/4); + reset_counts(); + parse_proc_interrupts(); + pci_numa_scan(); + calculate_workload(); + sort_irq_list(); + if (debug_mode) + dump_workloads(); + + while (1) { + sleep_approx(SLEEP_INTERVAL); + if (debug_mode) + printf("\n\n\n-----------------------------------------------------------------------------\n"); + + + check_power_mode(); + parse_proc_interrupts(); + + /* cope with cpu hotplug -- detected during /proc/interrupts parsing */ + if (need_cpu_rescan) { + need_cpu_rescan = 0; + /* if there's a hotplug event we better turn off power mode for a bit until things settle */ + power_mode = 0; + if (debug_mode) + printf("Rescanning cpu topology \n"); + reset_counts(); + clear_work_stats(); + + clear_cpu_tree(); + parse_cpu_tree(); + } + + /* deal with NAPI */ + account_for_nic_stats(); + calculate_workload(); + + /* to cope with dynamic configurations we scan for new numa information + * once every 5 minutes + */ + if (counter % NUMA_REFRESH_INTERVAL == 16) + pci_numa_scan(); + + calculate_placement(); + activate_mapping(); + + if (debug_mode) + dump_tree(); + if (one_shot_mode) + break; + counter++; + } + return EXIT_SUCCESS; +} diff --git a/root_image/irqbalance/irqbalance.h b/root_image/irqbalance/irqbalance.h new file mode 100644 index 0000000..eafe13e --- /dev/null +++ b/root_image/irqbalance/irqbalance.h @@ -0,0 +1,50 @@ +#ifndef __INCLUDE_GUARD_IRQBALANCE_H_ +#define __INCLUDE_GUARD_IRQBALANCE_H_ + + +#include "constants.h" + +#include "cpumask.h" + +#include +#include + +#include "types.h" + +struct interrupt; + +extern int package_count; +extern int cache_domain_count; +extern int core_count; +extern char *classes[]; +extern int map_class_to_level[7]; +extern int class_counts[7]; +extern int debug_mode; +extern int power_mode; +extern int need_cpu_rescan; +extern int one_shot_mode; +extern GList *interrupts; + + +extern void parse_cpu_tree(void); +extern void clear_work_stats(void); +extern void parse_proc_interrupts(void); +extern void set_interrupt_count(int number, uint64_t count); +extern void add_interrupt_count(int number, uint64_t count, int type); +extern int find_class(struct interrupt *irq, char *string); +extern void add_interrupt_numa(int number, cpumask_t mask, int type); + +void calculate_workload(void); +void reset_counts(void); +void dump_workloads(void); +void sort_irq_list(void); +void calculate_placement(void); +void dump_tree(void); + +void activate_mapping(void); +void account_for_nic_stats(void); +void check_power_mode(void); +void clear_cpu_tree(void); +void pci_numa_scan(void); + +#endif diff --git a/root_image/irqbalance/irqlist.c b/root_image/irqbalance/irqlist.c new file mode 100644 index 0000000..d91563e --- /dev/null +++ b/root_image/irqbalance/irqlist.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file has the basic functions to manipulate interrupt metadata + */ +#include "config.h" +#include +#include +#include +#include +#include + +#include "types.h" +#include "irqbalance.h" + +GList *interrupts; + + + +void get_affinity_hint(struct interrupt *irq, int number) +{ + char buf[PATH_MAX]; + char *line = NULL; + size_t size = 0; + FILE *file; + sprintf(buf, "/proc/irq/%i/affinity_hint", number); + file = fopen(buf, "r"); + if (!file) + return; + if (getline(&line, &size, file)==0) { + free(line); + fclose(file); + return; + } + cpumask_parse_user(line, strlen(line), irq->node_mask); + fclose(file); + free(line); +} + +/* + * This function classifies and reads various things from /proc about a specific irq + */ +static void investigate(struct interrupt *irq, int number) +{ + DIR *dir; + struct dirent *entry; + char *c, *c2; + int nr , count = 0; + char buf[PATH_MAX]; + sprintf(buf, "/proc/irq/%i", number); + dir = opendir(buf); + do { + entry = readdir(dir); + if (!entry) + break; + if (strcmp(entry->d_name,"smp_affinity")==0) { + char *line = NULL; + size_t size = 0; + FILE *file; + sprintf(buf, "/proc/irq/%i/smp_affinity", number); + file = fopen(buf, "r"); + if (!file) + continue; + if (getline(&line, &size, file)==0) { + free(line); + fclose(file); + continue; + } + cpumask_parse_user(line, strlen(line), irq->mask); + fclose(file); + free(line); + } else if (strcmp(entry->d_name,"allowed_affinity")==0) { + char *line = NULL; + size_t size = 0; + FILE *file; + sprintf(buf, "/proc/irq/%i/allowed_affinity", number); + file = fopen(buf, "r"); + if (!file) + continue; + if (getline(&line, &size, file)==0) { + free(line); + fclose(file); + continue; + } + cpumask_parse_user(line, strlen(line), irq->allowed_mask); + fclose(file); + free(line); + } else if (strcmp(entry->d_name,"affinity_hint")==0) { + get_affinity_hint(irq, number); + } else { + irq->class = find_class(irq, entry->d_name); + } + + } while (entry); + closedir(dir); + irq->balance_level = map_class_to_level[irq->class]; + + for (nr = 0; nr < NR_CPUS; nr++) + if (cpu_isset(nr, irq->allowed_mask)) + count++; + + /* if there is no choice in the allowed mask, don't bother to balance */ + if (count<2) + irq->balance_level = BALANCE_NONE; + + + /* next, check the IRQBALANCE_BANNED_INTERRUPTS env variable for blacklisted irqs */ + c = c2 = getenv("IRQBALANCE_BANNED_INTERRUPTS"); + if (!c) + return; + + do { + c = c2; + nr = strtoul(c, &c2, 10); + if (c!=c2 && nr == number) + irq->balance_level = BALANCE_NONE; + } while (c!=c2 && c2!=NULL); +} + + +/* + * Set the number of interrupts received for a specific irq; + * create the irq metadata if there is none yet + */ +void set_interrupt_count(int number, uint64_t count) +{ + GList *item; + struct interrupt *irq; + + if (count < MIN_IRQ_COUNT && !one_shot_mode) + return; /* no need to track or set interrupts sources without any activity since boot + but allow for a few (20) boot-time-only interrupts */ + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + + if (irq->number == number) { + irq->count = count; + /* see if affinity_hint changed */ + get_affinity_hint(irq, number); + return; + } + item = g_list_next(item); + } + /* new interrupt */ + irq = malloc(sizeof(struct interrupt)); + if (!irq) + return; + memset(irq, 0, sizeof(struct interrupt)); + irq->number = number; + irq->count = count; + irq->allowed_mask = CPU_MASK_ALL; + investigate(irq, number); + interrupts = g_list_append(interrupts, irq); +} + +/* + * Add extra irqs to a specific irq metadata structure; + * if no such metadata exists, do nothing at all + */ +void add_interrupt_count(int number, uint64_t count, int type) +{ + GList *item; + struct interrupt *irq; + + if (!count) + return; + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + if (irq->number == number) { + irq->extra += count; + if (irq->class < type && irq->balance_level != BALANCE_NONE) { + irq->class = type; + irq->balance_level = map_class_to_level[irq->class]; + } + return; + } + } +} + +/* + * Set the numa affinity mask for a specific interrupt if there + * is metadata for the interrupt; do nothing if no such data + * exists. + */ +void add_interrupt_numa(int number, cpumask_t mask, int type) +{ + GList *item; + struct interrupt *irq; + + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + if (irq->number == number) { + cpus_or(irq->numa_mask, irq->numa_mask, mask); + if (irq->class < type && irq->balance_level != BALANCE_NONE) { + irq->class = type; + irq->balance_level = map_class_to_level[irq->class]; + } + return; + } + } +} + +void calculate_workload(void) +{ + int i; + GList *item; + struct interrupt *irq; + + for (i=0; i<7; i++) + class_counts[i]=0; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + irq->workload = irq->count - irq->old_count + irq->workload/3 + irq->extra; + class_counts[irq->class]++; + irq->old_count = irq->count; + irq->extra = 0; + } +} + +void reset_counts(void) +{ + GList *item; + struct interrupt *irq; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + irq->old_count = irq->count; + irq->extra = 0; + + } +} + +void dump_workloads(void) +{ + GList *item; + struct interrupt *irq; + item = g_list_first(interrupts); + while (item) { + irq = item->data; + item = g_list_next(item); + + printf("Interrupt %i (class %s) has workload %lu \n", irq->number, classes[irq->class], (unsigned long)irq->workload); + + } +} + + +static gint sort_irqs(gconstpointer A, gconstpointer B) +{ + struct interrupt *a, *b; + a = (struct interrupt*)A; + b = (struct interrupt*)B; + + if (a->class < b->class) + return 1; + if (a->class > b->class) + return -1; + if (a->workload < b->workload) + return 1; + if (a->workload > b->workload) + return -1; + if (alow) and then by workload (high->low) */ + interrupts = g_list_sort(interrupts, sort_irqs); +} diff --git a/root_image/irqbalance/network.c b/root_image/irqbalance/network.c new file mode 100644 index 0000000..6c9ff51 --- /dev/null +++ b/root_image/irqbalance/network.c @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * Due to NAPI, the actual number of interrupts for a network NIC is usually low + * even though the amount of work is high; this file is there to compensate for this + * by adding actual package counts to the calculated amount of work of interrupts + */ +#include "config.h" +#include +#include +#include +#include +/* some distros (Debian / SLES) ship a totally broken ethtool.h */ +/* work around the breakage some */ +#define u32 __u32 +#define u16 __u16 +#define u8 __u8 +#define u64 __u64 +#include +#undef u8 +#undef u16 +#undef u32 +#undef u64 +#include +#include +#include +#include +#include +#include + + +#include "irqbalance.h" + +struct nic { + char ethname[64]; + int irq; + uint64_t prev_pkt; + int counter; +}; + +static GList *nics; + + +static int dev_to_irq(char *devname) +{ + int sock, ret; + struct ifreq ifr; + struct ethtool_value ethtool; + struct ethtool_drvinfo driver; + FILE *file; + char *line = NULL; + size_t size; + int val; + + char buffer[PATH_MAX]; + + memset(&ifr, 0, sizeof(struct ifreq)); + memset(ðtool, 0, sizeof(struct ethtool_value)); + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock<0) + return 0; + + strcpy(ifr.ifr_name, devname); + + driver.cmd = ETHTOOL_GDRVINFO; + ifr.ifr_data = (void*) &driver; + ret = ioctl(sock, SIOCETHTOOL, &ifr); + close(sock); + if (ret<0) + return 0; + sprintf(buffer,"/sys/bus/pci/devices/%s/irq", driver.bus_info); + file = fopen(buffer, "r"); + if (!file) + return 0; + if (getline(&line, &size, file)==0) { + free(line); + fclose(file); + return 0; + } + fclose(file); + val = 0; + if (line) + val = strtoul(line, NULL, 10); + free(line); + return val; +} + +static struct nic *new_nic(char *name) +{ + struct nic *nic; + nic = malloc(sizeof(struct nic)); + if (!nic) + return NULL; + memset(nic, 0, sizeof(struct nic)); + strcpy(nic->ethname, name); + nic->irq = dev_to_irq(name); + nics = g_list_append(nics, nic); + return nic; +} + +static struct nic *find_nic(char *name) +{ + GList *item; + struct nic *nic; + item = g_list_first(nics); + while (item) { + nic = item->data; + item = g_list_next(item); + if (strcmp(nic->ethname, name)==0) { + nic->counter++; + /* refresh irq information once in a while; ifup/down + * can make this info go stale over time + */ + if ((nic->counter % NIC_REFRESH_INTERVAL) == 0) + nic->irq = dev_to_irq(nic->ethname); + return nic; + } + } + nic = new_nic(name); + return nic; +} + +void account_for_nic_stats(void) +{ + struct nic *nic; + FILE *file; + char *line = NULL; + size_t size = 0; + file = fopen("/proc/net/dev", "r"); + if (!file) + return; + /* first two lines are headers */ + if (getline(&line, &size, file)==0) { + free(line); + return; + } + if (getline(&line, &size, file)==0) { + free(line); + return; + } + + while (!feof(file)) { + uint64_t rxcount; + uint64_t txcount; + uint64_t delta; + int dummy; + char *c, *c2; + if (getline(&line, &size, file)==0) + break; + if (line==NULL) + break; + c = strchr(line, ':'); + if (c==NULL) /* header line */ + continue; + *c = 0; + c++; + c2 = &line[0]; + while (*c2==' ') c2++; + nic = find_nic(c2); + if (!nic) + continue; + dummy = strtoul(c, &c, 10); + rxcount = strtoull(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + dummy = strtoul(c, &c, 10); + txcount = strtoull(c, &c, 10); + delta = (txcount+rxcount-nic->prev_pkt)/2; + /* add the RX and TX packets to the irq count, but only for 50%; + many packets generate another IRQ anyway and we don't want to + overweigh this too much. Also limit this to 100.000 max */ + if (delta>100000) + delta = 100000; + if (delta>0 && nic->prev_pkt != 0) + add_interrupt_count(nic->irq, delta, IRQ_ETH); + nic->prev_pkt = rxcount + txcount; + + + } + fclose(file); + free(line); +} diff --git a/root_image/irqbalance/non-atomic.h b/root_image/irqbalance/non-atomic.h new file mode 100644 index 0000000..943501a --- /dev/null +++ b/root_image/irqbalance/non-atomic.h @@ -0,0 +1,115 @@ +/* + +This file is copied from the Linux kernel and mildly adjusted for use in userspace + + +*/ +#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ +#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ + +#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p |= mask; +} + +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p &= ~mask; +} + +/** + * __change_bit - Toggle a bit in memory + * @nr: the bit to change + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + + *p ^= mask; +} + +/** + * __test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old | mask; + return (old & mask) != 0; +} + +/** + * __test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old & ~mask; + return (old & mask) != 0; +} + +/* WARNING: non atomic and it can be reordered! */ +static inline int __test_and_change_bit(int nr, + volatile unsigned long *addr) +{ + unsigned long mask = BITOP_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr); + unsigned long old = *p; + + *p = old ^ mask; + return (old & mask) != 0; +} + +/** + * test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */ diff --git a/root_image/irqbalance/numa.c b/root_image/irqbalance/numa.c new file mode 100644 index 0000000..453a9bc --- /dev/null +++ b/root_image/irqbalance/numa.c @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ + +/* + * This file tries to map numa affinity of pci devices to their interrupts + * In addition the PCI class information is used to refine the classification + * of interrupt sources + */ +#include "config.h" +#include +#include +#include +#include +#include + +#include "irqbalance.h" + +void pci_numa_scan(void) +{ + DIR *dir; + struct dirent *entry; + cpumask_t mask; + char line[PATH_MAX]; + FILE *file; + int irq; + unsigned int class; + + dir = opendir("/sys/bus/pci/devices"); + if (!dir) + return; + do { + int type; + entry = readdir(dir); + if (!entry) + break; + if (strlen(entry->d_name)<3) + continue; + + sprintf(line,"/sys/bus/pci/devices/%s/irq", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + irq = strtoul(line, NULL, 10); + if (!irq) + continue; + + sprintf(line,"/sys/bus/pci/devices/%s/class", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + class = strtoul(line, NULL, 16); + + sprintf(line,"/sys/bus/pci/devices/%s/local_cpus", entry->d_name); + file = fopen(line, "r"); + if (!file) + continue; + if (fgets(line, PATH_MAX, file)==NULL) + line[0]=0; + fclose(file); + cpumask_parse_user(line, strlen(line), mask); + + type = IRQ_OTHER; + if ((class>>16) == 0x01) + type = IRQ_SCSI; +/* + * Ethernet gets the type via /proc/net/dev; in addition down'd interfaces + * shouldn't boost interrupts + if ((class>>16) == 0x02) + type = IRQ_ETH; +*/ + if ((class>>16) >= 0x03 && (class>>16) <= 0x0C) + type = IRQ_LEGACY; + + add_interrupt_numa(irq, mask, type); + + } while (entry); + closedir(dir); +} diff --git a/root_image/irqbalance/placement.c b/root_image/irqbalance/placement.c new file mode 100644 index 0000000..aae0576 --- /dev/null +++ b/root_image/irqbalance/placement.c @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include "config.h" +#include +#include +#include +#include + +#include "types.h" +#include "irqbalance.h" + + +int power_mode; + +extern GList *interrupts, *packages, *cache_domains, *cpus; + +static uint64_t package_cost_func(struct interrupt *irq, struct package *package) +{ + int bonus = 0; + int maxcount; + /* moving to a cold package/cache/etc gets you a 3000 penalty */ + if (!cpus_intersects(irq->old_mask, package->mask)) + bonus = CROSS_PACKAGE_PENALTY; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, package->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + /* in power save mode, you better be on package 0, with overflow to the next package if really needed */ + if (power_mode) + bonus += POWER_MODE_PACKAGE_THRESHOLD * package->number; + + /* if we're out of whack in terms of per class counts.. just block (except in power mode) */ + maxcount = (class_counts[irq->class] + package_count -1 ) / package_count; + if (package->class_count[irq->class]>=maxcount && !power_mode) + bonus += 300000; + + /* if the package has no cpus in the allowed mask.. just block */ + if (!cpus_intersects(irq->allowed_mask, package->mask)) + bonus += 600000; + + return irq->workload + bonus; +} + +static uint64_t cache_domain_cost_func(struct interrupt *irq, struct cache_domain *cache_domain) +{ + int bonus = 0; + /* moving to a cold cache gets you a 1500 penalty */ + if (!cpus_intersects(irq->old_mask, cache_domain->mask)) + bonus = CROSS_PACKAGE_PENALTY/2; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, cache_domain->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + + /* pay 6000 for each previous interrupt of the same class */ + bonus += CLASS_VIOLATION_PENTALTY * cache_domain->class_count[irq->class]; + + /* if the cache domain has no cpus in the allowed mask.. just block */ + if (!cpus_intersects(irq->allowed_mask, cache_domain->mask)) + bonus += 600000; + + return irq->workload + bonus; +} + +static uint64_t cpu_cost_func(struct interrupt *irq, struct cpu_core *cpu) +{ + int bonus = 0; + /* moving to a colder core gets you a 1000 penalty */ + if (!cpus_intersects(irq->old_mask, cpu->mask)) + bonus = CROSS_PACKAGE_PENALTY/3; + + /* do a little numa affinity */ + if (!cpus_intersects(irq->numa_mask, cpu->mask)) + bonus += NUMA_PENALTY; + + /* but if the irq has had 0 interrupts for a while move it about more easily */ + if (irq->workload==0) + bonus = bonus / 10; + + /* + * since some chipsets only place at the first cpu, give a tiny preference to non-first + * cpus for specifically placed interrupts + */ + if (first_cpu(cpu->cache_mask)==cpu->number) + bonus++; + + /* pay 6000 for each previous interrupt of the same class */ + bonus += CLASS_VIOLATION_PENTALTY * cpu->class_count[irq->class]; + + /* if the core has no cpus in the allowed mask.. just block */ + if (!cpus_intersects(irq->allowed_mask, cpu->mask)) + bonus += 600000; + + return irq->workload + bonus; +} + + +static void place_cache_domain(struct package *package) +{ + GList *iter, *next; + GList *pkg; + struct interrupt *irq; + struct cache_domain *cache_domain; + + + iter = g_list_first(package->interrupts); + while (iter) { + struct cache_domain *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + + if (irq->balance_level <= BALANCE_PACKAGE) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(package->cache_domains); + while (pkg) { + uint64_t newload; + + cache_domain = pkg->data; + newload = cache_domain->workload + cache_domain_cost_func(irq, cache_domain); + if (newload < best_cost) { + best = cache_domain; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + next = g_list_next(iter); + package->interrupts = g_list_delete_link(package->interrupts, iter); + + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + iter = next; + } else + iter = g_list_next(iter); + } +} + + +static void place_core(struct cache_domain *cache_domain) +{ + GList *iter, *next; + GList *pkg; + struct interrupt *irq; + struct cpu_core *cpu; + + + iter = g_list_first(cache_domain->interrupts); + while (iter) { + struct cpu_core *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + + /* if the irq isn't per-core policy and is not very busy, leave it at cache domain level */ + if (irq->balance_level <= BALANCE_CACHE && irq->workload < CORE_SPECIFIC_THRESHOLD && !one_shot_mode) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(cache_domain->cpu_cores); + while (pkg) { + uint64_t newload; + + cpu = pkg->data; + newload = cpu->workload + cpu_cost_func(irq, cpu); + if (newload < best_cost) { + best = cpu; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + next = g_list_next(iter); + cache_domain->interrupts = g_list_delete_link(cache_domain->interrupts, iter); + + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + iter = next; + } else + iter = g_list_next(iter); + } +} + + +static void place_packages(GList *list) +{ + GList *iter; + GList *pkg; + struct interrupt *irq; + struct package *package; + + + iter = g_list_first(list); + while (iter) { + struct package *best = NULL; + uint64_t best_cost = INT_MAX; + irq = iter->data; + if (irq->balance_level == BALANCE_NONE) { + iter = g_list_next(iter); + continue; + } + pkg = g_list_first(packages); + while (pkg) { + uint64_t newload; + + package = pkg->data; + newload = package->workload + package_cost_func(irq, package); + if (newload < best_cost) { + best = package; + best_cost = newload; + } + + pkg = g_list_next(pkg); + } + if (best) { + best->workload += irq->workload + 1; + best->interrupts=g_list_append(best->interrupts, irq); + best->class_count[irq->class]++; + irq->mask = best->mask; + } + iter = g_list_next(iter); + } +} + + +static void place_affinity_hint(GList *list) +{ + /* still need to balance best workload within the affinity_hint mask */ + GList *iter; + struct interrupt *irq; + + iter = g_list_first(list); + while (iter) { + irq = iter->data; + if (irq->balance_level == BALANCE_NONE) { + iter = g_list_next(iter); + continue; + } + if ((!cpus_empty(irq->node_mask)) && + (!cpus_equal(irq->mask, irq->node_mask)) && + (!cpus_full(irq->node_mask))) { + irq->old_mask = irq->mask; + irq->mask = irq->node_mask; + } + + iter = g_list_next(iter); + } +} + + +static void do_unroutables(void) +{ + struct package *package; + struct cache_domain *cache_domain; + struct cpu_core *cpu; + struct interrupt *irq; + GList *iter, *inter; + + inter = g_list_first(interrupts); + while (inter) { + irq = inter->data; + inter = g_list_next(inter); + if (irq->balance_level != BALANCE_NONE) + continue; + + iter = g_list_first(packages); + while (iter) { + package = iter->data; + if (cpus_intersects(package->mask, irq->node_mask) || + cpus_intersects(package->mask, irq->mask)) + package->workload += irq->workload; + iter = g_list_next(iter); + } + + iter = g_list_first(cache_domains); + while (iter) { + cache_domain = iter->data; + if (cpus_intersects(cache_domain->mask, irq->node_mask) + || cpus_intersects(cache_domain->mask, irq->mask)) + cache_domain->workload += irq->workload; + iter = g_list_next(iter); + } + iter = g_list_first(cpus); + while (iter) { + cpu = iter->data; + if (cpus_intersects(cpu->mask, irq->node_mask) || + cpus_intersects(cpu->mask, irq->mask)) + cpu->workload += irq->workload; + iter = g_list_next(iter); + } + } +} + + +void calculate_placement(void) +{ + struct package *package; + struct cache_domain *cache_domain; + GList *iter; + /* first clear old data */ + clear_work_stats(); + sort_irq_list(); + do_unroutables(); + + place_packages(interrupts); + iter = g_list_first(packages); + while (iter) { + package = iter->data; + place_cache_domain(package); + iter = g_list_next(iter); + } + + iter = g_list_first(cache_domains); + while (iter) { + cache_domain = iter->data; + place_core(cache_domain); + iter = g_list_next(iter); + } + /* + * if affinity_hint is populated on irq and is not set to + * all CPUs (meaning it's initialized), honor that above + * anything in the package locality/workload. + */ + place_affinity_hint(interrupts); +} diff --git a/root_image/irqbalance/powermode.c b/root_image/irqbalance/powermode.c new file mode 100644 index 0000000..792003a --- /dev/null +++ b/root_image/irqbalance/powermode.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include "config.h" +#include +#include +#include +#include +#include + +#include "irqbalance.h" + + +extern int power_mode; + +static uint64_t previous; + +static unsigned int hysteresis; + +void check_power_mode(void) +{ + FILE *file; + char *line = NULL; + size_t size = 0; + char *c; + uint64_t dummy, irq, softirq; + file = fopen("/proc/stat", "r"); + if (!file) + return; + if (getline(&line, &size, file)==0) + size=0; + fclose(file); + if (!line) + return; + c=&line[4]; + dummy = strtoull(c, &c, 10); /* user */ + dummy = strtoull(c, &c, 10); /* nice */ + dummy = strtoull(c, &c, 10); /* system */ + dummy = strtoull(c, &c, 10); /* idle */ + dummy = strtoull(c, &c, 10); /* iowait */ + irq = strtoull(c, &c, 10); /* irq */ + softirq = strtoull(c, &c, 10); /* softirq */ + + + irq += softirq; + printf("IRQ delta is %lu \n", (unsigned long)(irq - previous) ); + if (irq - previous < POWER_MODE_SOFTIRQ_THRESHOLD) { + hysteresis++; + if (hysteresis > POWER_MODE_HYSTERESIS) { + if (debug_mode && !power_mode) + printf("IRQ delta is %lu, switching to power mode \n", (unsigned long)(irq - previous) ); + power_mode = 1; + } + } else { + if (debug_mode && power_mode) + printf("IRQ delta is %lu, switching to performance mode \n", (unsigned long)(irq - previous) ); + power_mode = 0; + hysteresis = 0; + } + previous = irq; + free(line); +} + diff --git a/root_image/irqbalance/procinterrupts.c b/root_image/irqbalance/procinterrupts.c new file mode 100644 index 0000000..e336efe --- /dev/null +++ b/root_image/irqbalance/procinterrupts.c @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2006, Intel Corporation + * + * This file is part of irqbalance + * + * This program file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program in a file named COPYING; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301 USA + */ +#include "config.h" +#include +#include +#include +#include +#include + +#include "cpumask.h" +#include "irqbalance.h" + +#define LINESIZE 4096 + +void parse_proc_interrupts(void) +{ + FILE *file; + char *line = NULL; + size_t size = 0; + + file = fopen("/proc/interrupts", "r"); + if (!file) + return; + + /* first line is the header we don't need; nuke it */ + if (getline(&line, &size, file)==0) { + free(line); + return; + } + + while (!feof(file)) { + int cpunr; + int number; + uint64_t count; + char *c, *c2; + + if (getline(&line, &size, file)==0) + break; + + + /* lines with letters in front are special, like NMI count. Ignore */ + if (!(line[0]==' ' || (line[0]>='0' && line[0]<='9'))) + break; + c = strchr(line, ':'); + if (!c) + continue; + *c = 0; + c++; + number = strtoul(line, NULL, 10); + count = 0; + cpunr = 0; + + c2=NULL; + while (1) { + uint64_t C; + C = strtoull(c, &c2, 10); + if (c==c2) /* end of numbers */ + break; + count += C; + c=c2; + cpunr++; + } + if (cpunr != core_count) + need_cpu_rescan = 1; + + set_interrupt_count(number, count); + } + fclose(file); + free(line); +} diff --git a/root_image/irqbalance/types.h b/root_image/irqbalance/types.h new file mode 100644 index 0000000..b986fe6 --- /dev/null +++ b/root_image/irqbalance/types.h @@ -0,0 +1,90 @@ +#ifndef _INCLUDE_GUARD_TYPES_H +#define _INCLUDE_GUARD_TYPES_H + +#include + +#include "cpumask.h" + +#define BALANCE_NONE 0 +#define BALANCE_PACKAGE 1 +#define BALANCE_CACHE 2 +#define BALANCE_CORE 3 + +#define IRQ_OTHER 0 +#define IRQ_LEGACY 1 +#define IRQ_SCSI 2 +#define IRQ_TIMER 3 +#define IRQ_ETH 4 +#define IRQ_GETH 5 +#define IRQ_TGETH 6 + + +struct package { + uint64_t workload; + int number; + + cpumask_t mask; + + int class_count[7]; + + GList *cache_domains; + GList *interrupts; +}; + +struct cache_domain { + uint64_t workload; + int number; + + int marker; + + cpumask_t mask; + + cpumask_t package_mask; + + int class_count[7]; + + GList *cpu_cores; + GList *interrupts; +}; + + +struct cpu_core { + uint64_t workload; + int number; + + int marker; + + int class_count[7]; + + cpumask_t package_mask; + cpumask_t cache_mask; + cpumask_t mask; + + GList *interrupts; +}; + +struct interrupt { + uint64_t workload; + + int balance_level; + + int number; + int class; + + uint64_t count; + uint64_t old_count; + uint64_t extra; + + cpumask_t mask; + cpumask_t old_mask; + + + cpumask_t numa_mask; + cpumask_t allowed_mask; + + /* user/driver provided for smarter balancing */ + cpumask_t node_mask; +}; + + +#endif -- 2.39.5