--- /dev/null
+Arjen Van De Ven <arjanvandeven@gmail.com>
+Neil Horman <nhorman@gmail.com>
+
--- /dev/null
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+\f
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+\f
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+\f
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+\f
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+\f
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
--- /dev/null
+This is all tracked in the SVN repo. This file is just here to keep the
+autotools from complaining
+
--- /dev/null
+CFLAGS+=-g -Os -D_FORTIFY_SOURCE=2 -Wall -W `pkg-config --cflags glib-2.0`
+
+all: irqbalance
+
+LIBS=bitmap.o irqbalance.o cputree.o procinterrupts.o irqlist.o placement.o activate.o network.o powermode.o numa.o classify.o
+
+irqbalance: .depend $(LIBS)
+ gcc -g -O2 -D_FORTIFY_SOURCE=2 -Wall `pkg-config --libs glib-2.0` $(LIBS) -o irqbalance
+
+clean:
+ rm -f irqbalance *~ *.o .depend
+
+# rule for building dependency lists, and writing them to a file
+# named ".depend".
+.depend:
+ rm -f .depend
+ gccmakedep -f- -- $(CFLAGS) -- *.c > .depend
--- /dev/null
+# Makefile.am --
+# Copyright 2009 Red Hat Inc., Durham, North Carolina.
+# All Rights Reserved.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# Authors:
+# Steve Grubb <sgrubb@redhat.com>
+#
+
+AUTOMAKE_OPTIONS = no-dependencies
+EXTRA_DIST = README INSTALL COPYING autogen.sh m4/cap-ng.m4
+
+INCLUDES = -I${top_srcdir}
+LIBS = $(CAPNG_LDADD) $(GLIB_LIBS)
+AM_CFLAGS = -g -Os -W -Wall -Wshadow -Wformat -Wundef $(GLIB_CFLAGS) -D_GNU_SOURCE
+noinst_HEADERS = bitmap.h constants.h cpumask.h irqbalance.h non-atomic.h \
+ types.h
+sbin_PROGRAMS = irqbalance
+irqbalance_SOURCES = activate.c bitmap.c classify.c cputree.c irqbalance.c \
+ irqlist.c network.c numa.c placement.c powermode.c procinterrupts.c
+
+CONFIG_CLEAN_FILES = debug*.list config/*
+clean-generic:
+ rm -rf autom4te*.cache
+ rm -f *.rej *.orig *~
+
--- /dev/null
+No news currently
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This file contains the code to communicate a selected distribution / mapping
+ * of interrupts to the kernel.
+ */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include "irqbalance.h"
+
+
+void activate_mapping(void)
+{
+ struct interrupt *irq;
+ GList *iter;
+
+ iter = g_list_first(interrupts);
+ while (iter) {
+ irq = iter->data;
+ iter = g_list_next(iter);
+
+ /* don't set the level if it's a NONE irq, or if there is
+ * no change */
+ if (irq->balance_level != BALANCE_NONE &&
+ !cpus_equal(irq->mask, irq->old_mask)) {
+ char buf[PATH_MAX];
+ FILE *file;
+ sprintf(buf, "/proc/irq/%i/smp_affinity", irq->number);
+ file = fopen(buf, "w");
+ if (!file)
+ continue;
+ cpumask_scnprintf(buf, PATH_MAX, irq->mask);
+ fprintf(file,"%s", buf);
+ fclose(file);
+ irq->old_mask = irq->mask;
+ }
+ }
+}
--- /dev/null
+#! /bin/sh
+set -x -e
+# --no-recursive is available only in recent autoconf versions
+autoreconf -fv --install
--- /dev/null
+/*
+
+This file is taken from the Linux kernel and minimally adapted for use in userspace
+
+*/
+
+/*
+ * lib/bitmap.c
+ * Helper functions for bitmap.h.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include "config.h"
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "bitmap.h"
+#include "non-atomic.h"
+
+/*
+ * bitmaps provide an array of bits, implemented using an an
+ * array of unsigned longs. The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'. The implementation makes
+ * no particular effort to keep them zero. It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures. See the big-endian headers
+ * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h
+ * for the best explanations of this ordering.
+ */
+
+int __bitmap_empty(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+
+int __bitmap_full(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (~bitmap[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+
+int __bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] != bitmap2[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ dst[k] = ~src[k];
+
+ if (bits % BITS_PER_LONG)
+ dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+}
+
+/*
+ * __bitmap_shift_right - logical right shift of the bits in a bitmap
+ * @dst - destination bitmap
+ * @src - source bitmap
+ * @nbits - shift by this many bits
+ * @bits - bitmap size, in bits
+ *
+ * Shifting right (dividing) means moving bits in the MS -> LS bit
+ * direction. Zeros are fed into the vacated MS positions and the
+ * LS bits shifted off the bottom are lost.
+ */
+void __bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits)
+{
+ int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG;
+ int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
+ unsigned long mask = (1UL << left) - 1;
+ for (k = 0; off + k < lim; ++k) {
+ unsigned long upper, lower;
+
+ /*
+ * If shift is not word aligned, take lower rem bits of
+ * word above and make them the top rem bits of result.
+ */
+ if (!rem || off + k + 1 >= lim)
+ upper = 0;
+ else {
+ upper = src[off + k + 1];
+ if (off + k + 1 == lim - 1 && left)
+ upper &= mask;
+ }
+ lower = src[off + k];
+ if (left && off + k == lim - 1)
+ lower &= mask;
+ dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem;
+ if (left && k == lim - 1)
+ dst[k] &= mask;
+ }
+ if (off)
+ memset(&dst[lim - off], 0, off*sizeof(unsigned long));
+}
+
+
+/*
+ * __bitmap_shift_left - logical left shift of the bits in a bitmap
+ * @dst - destination bitmap
+ * @src - source bitmap
+ * @nbits - shift by this many bits
+ * @bits - bitmap size, in bits
+ *
+ * Shifting left (multiplying) means moving bits in the LS -> MS
+ * direction. Zeros are fed into the vacated LS bit positions
+ * and those MS bits shifted off the top are lost.
+ */
+
+void __bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits)
+{
+ int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG;
+ int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
+ for (k = lim - off - 1; k >= 0; --k) {
+ unsigned long upper, lower;
+
+ /*
+ * If shift is not word aligned, take upper rem bits of
+ * word below and make them the bottom rem bits of result.
+ */
+ if (rem && k > 0)
+ lower = src[k - 1];
+ else
+ lower = 0;
+ upper = src[k];
+ if (left && k == lim - 1)
+ upper &= (1UL << left) - 1;
+ dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem;
+ if (left && k + off == lim - 1)
+ dst[k + off] &= (1UL << left) - 1;
+ }
+ if (off)
+ memset(dst, 0, off*sizeof(unsigned long));
+}
+
+void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] & bitmap2[k];
+}
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] | bitmap2[k];
+}
+
+void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] ^ bitmap2[k];
+}
+
+void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] & ~bitmap2[k];
+}
+
+int __bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] & bitmap2[k])
+ return 1;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 1;
+ return 0;
+}
+
+/*
+ * Bitmap printing & parsing functions: first version by Bill Irwin,
+ * second version by Paul Jackson, third by Joe Korty.
+ */
+
+#define CHUNKSZ 32
+#define nbits_to_hold_value(val) fls(val)
+#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
+#define BASEDEC 10 /* fancier cpuset lists input in decimal */
+
+/**
+ * bitmap_scnprintf - convert bitmap to an ASCII hex string.
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Exactly @nmaskbits bits are displayed. Hex digits are grouped into
+ * comma-separated sets of eight digits per set.
+ */
+int bitmap_scnprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, int nmaskbits)
+{
+ int i, word, bit, len = 0;
+ unsigned long val;
+ const char *sep = "";
+ int chunksz;
+ uint32_t chunkmask;
+ int first = 1;
+
+ chunksz = nmaskbits & (CHUNKSZ - 1);
+ if (chunksz == 0)
+ chunksz = CHUNKSZ;
+
+ i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ;
+ for (; i >= 0; i -= CHUNKSZ) {
+ chunkmask = ((1ULL << chunksz) - 1);
+ word = i / BITS_PER_LONG;
+ bit = i % BITS_PER_LONG;
+ val = (maskp[word] >> bit) & chunkmask;
+ if (val!=0 || !first || i==0) {
+ len += snprintf(buf+len, buflen-len, "%s%0*lx", sep,
+ (chunksz+3)/4, val);
+ chunksz = CHUNKSZ;
+ sep = ",";
+ first = 0;
+ }
+ }
+ return len;
+}
+
+/**
+ * __bitmap_parse - convert an ASCII hex string into a bitmap.
+ * @buf: pointer to buffer containing string.
+ * @buflen: buffer size in bytes. If string is smaller than this
+ * then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Commas group hex digits into chunks. Each chunk defines exactly 32
+ * bits of the resultant bitmask. No chunk may specify a value larger
+ * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value
+ * then leading 0-bits are prepended. %-EINVAL is returned for illegal
+ * characters and for grouping errors such as "1,,5", ",44", "," and "".
+ * Leading and trailing whitespace accepted, but not embedded whitespace.
+ */
+int __bitmap_parse(const char *buf, unsigned int buflen,
+ int is_user __attribute((unused)), unsigned long *maskp,
+ int nmaskbits)
+{
+ int c, old_c, totaldigits, ndigits, nchunks, nbits;
+ uint32_t chunk;
+
+ bitmap_zero(maskp, nmaskbits);
+
+ nchunks = nbits = totaldigits = c = 0;
+ do {
+ chunk = ndigits = 0;
+
+ /* Get the next chunk of the bitmap */
+ while (buflen) {
+ old_c = c;
+ c = *buf++;
+ buflen--;
+ if (isspace(c))
+ continue;
+
+ /*
+ * If the last character was a space and the current
+ * character isn't '\0', we've got embedded whitespace.
+ * This is a no-no, so throw an error.
+ */
+ if (totaldigits && c && isspace(old_c))
+ return 0;
+
+ /* A '\0' or a ',' signal the end of the chunk */
+ if (c == '\0' || c == ',')
+ break;
+
+ if (!isxdigit(c))
+ return -EINVAL;
+
+ /*
+ * Make sure there are at least 4 free bits in 'chunk'.
+ * If not, this hexdigit will overflow 'chunk', so
+ * throw an error.
+ */
+ if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
+ return -EOVERFLOW;
+
+ chunk = (chunk << 4) | unhex(c);
+ ndigits++; totaldigits++;
+ }
+ if (ndigits == 0)
+ return -EINVAL;
+ if (nchunks == 0 && chunk == 0)
+ continue;
+
+ __bitmap_shift_left(maskp, maskp, CHUNKSZ, nmaskbits);
+ *maskp |= chunk;
+ nchunks++;
+ nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ;
+ if (nbits > nmaskbits)
+ return -EOVERFLOW;
+ } while (buflen && c == ',');
+
+ return 0;
+}
--- /dev/null
+#ifndef __LINUX_BITMAP_H
+#define __LINUX_BITMAP_H
+
+#ifndef __ASSEMBLY__
+
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+
+
+#define BITS_PER_LONG ((int)sizeof(unsigned long)*8)
+
+#define BITS_TO_LONGS(bits) \
+ (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define DECLARE_BITMAP(name,bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
+
+
+#include "non-atomic.h"
+
+static inline unsigned int hweight32(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+}
+
+static inline unsigned long hweight64(uint64_t w)
+{
+ if (BITS_PER_LONG == 32)
+ return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+
+ w -= (w >> 1) & 0x5555555555555555ull;
+ w = (w & 0x3333333333333333ull) + ((w >> 2) & 0x3333333333333333ull);
+ w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0full;
+ return (w * 0x0101010101010101ull) >> 56;
+}
+
+
+static inline int fls(int x)
+{
+ int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff0000u)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xff000000u)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xf0000000u)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xc0000000u)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000u)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
+}
+
+#define min(x,y) ({ \
+ typeof(x) _x = (x); \
+ typeof(y) _y = (y); \
+ (void) (&_x == &_y); \
+ _x < _y ? _x : _y; })
+
+
+/*
+ * bitmaps provide bit arrays that consume one or more unsigned
+ * longs. The bitmap interface and available operations are listed
+ * here, in bitmap.h
+ *
+ * Function implementations generic to all architectures are in
+ * lib/bitmap.c. Functions implementations that are architecture
+ * specific are in various include/asm-<arch>/bitops.h headers
+ * and other arch/<arch> specific files.
+ *
+ * See lib/bitmap.c for more details.
+ */
+
+/*
+ * The available bitmap operations and their rough meaning in the
+ * case that the bitmap is a single unsigned long are thus:
+ *
+ * Note that nbits should be always a compile time evaluable constant.
+ * Otherwise many inlines will generate horrible code.
+ *
+ * bitmap_zero(dst, nbits) *dst = 0UL
+ * bitmap_fill(dst, nbits) *dst = ~0UL
+ * bitmap_copy(dst, src, nbits) *dst = *src
+ * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2
+ * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2
+ * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2
+ * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2)
+ * bitmap_complement(dst, src, nbits) *dst = ~(*src)
+ * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal?
+ * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap?
+ * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2?
+ * bitmap_empty(src, nbits) Are all bits zero in *src?
+ * bitmap_full(src, nbits) Are all bits set in *src?
+ * bitmap_weight(src, nbits) Hamming Weight: number set bits
+ * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
+ * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
+ * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
+ * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit)
+ * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf
+ * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf
+ * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf
+ * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf
+ * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list
+ * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region
+ * bitmap_release_region(bitmap, pos, order) Free specified bit region
+ * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region
+ */
+
+/*
+ * Also the following operations in asm/bitops.h apply to bitmaps.
+ *
+ * set_bit(bit, addr) *addr |= bit
+ * clear_bit(bit, addr) *addr &= ~bit
+ * change_bit(bit, addr) *addr ^= bit
+ * test_bit(bit, addr) Is bit set in *addr?
+ * test_and_set_bit(bit, addr) Set bit and return old value
+ * test_and_clear_bit(bit, addr) Clear bit and return old value
+ * test_and_change_bit(bit, addr) Change bit and return old value
+ * find_first_zero_bit(addr, nbits) Position first zero bit in *addr
+ * find_first_bit(addr, nbits) Position first set bit in *addr
+ * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit
+ * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit
+ */
+
+/*
+ * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used
+ * to declare an array named 'name' of just enough unsigned longs to
+ * contain all bit positions from 0 to 'bits' - 1.
+ */
+
+/*
+ * lib/bitmap.c provides these functions:
+ */
+
+extern int __bitmap_empty(const unsigned long *bitmap, int bits);
+extern int __bitmap_full(const unsigned long *bitmap, int bits);
+extern int __bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int bits);
+extern void __bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+extern void __bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+extern void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern int __bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern int __bitmap_subset(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+extern int __bitmap_weight(const unsigned long *bitmap, int bits);
+
+extern int bitmap_scnprintf(char *buf, unsigned int len,
+ const unsigned long *src, int nbits);
+extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
+ unsigned long *dst, int nbits);
+extern int bitmap_scnlistprintf(char *buf, unsigned int len,
+ const unsigned long *src, int nbits);
+extern int bitmap_parselist(const char *buf, unsigned long *maskp,
+ int nmaskbits);
+extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
+ const unsigned long *old, const unsigned long *new, int bits);
+extern int bitmap_bitremap(int oldbit,
+ const unsigned long *old, const unsigned long *new, int bits);
+extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order);
+extern void bitmap_release_region(unsigned long *bitmap, int pos, int order);
+extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order);
+
+#define BITMAP_LAST_WORD_MASK(nbits) \
+( \
+ ((nbits) % BITS_PER_LONG) ? \
+ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
+)
+
+static inline void bitmap_zero(unsigned long *dst, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = 0UL;
+ else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memset(dst, 0, len);
+ }
+}
+
+static inline void bitmap_fill(unsigned long *dst, int nbits)
+{
+ size_t nlongs = BITS_TO_LONGS(nbits);
+ if (nlongs > 1) {
+ int len = (nlongs - 1) * sizeof(unsigned long);
+ memset(dst, 0xff, len);
+ }
+ dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
+}
+
+static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
+ int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src;
+ else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memcpy(dst, src, len);
+ }
+}
+
+static inline void bitmap_and(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src1 & *src2;
+ else
+ __bitmap_and(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src1 | *src2;
+ else
+ __bitmap_or(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src1 ^ *src2;
+ else
+ __bitmap_xor(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src1 & ~(*src2);
+ else
+ __bitmap_andnot(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+ else
+ __bitmap_complement(dst, src, nbits);
+}
+
+static inline int bitmap_equal(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
+ else
+ return __bitmap_equal(src1, src2, nbits);
+}
+
+static inline int bitmap_intersects(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+ else
+ return __bitmap_intersects(src1, src2, nbits);
+}
+
+static inline int bitmap_subset(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
+ else
+ return __bitmap_subset(src1, src2, nbits);
+}
+
+static inline int bitmap_empty(const unsigned long *src, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+ else
+ return __bitmap_empty(src, nbits);
+}
+
+static inline int bitmap_full(const unsigned long *src, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
+ else
+ return __bitmap_full(src, nbits);
+}
+
+static inline int bitmap_weight(const unsigned long *src, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
+ return __bitmap_weight(src, nbits);
+}
+
+static inline void bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int n, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = *src >> n;
+ else
+ __bitmap_shift_right(dst, src, n, nbits);
+}
+
+static inline void bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int n, int nbits)
+{
+ if (nbits <= BITS_PER_LONG)
+ *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits);
+ else
+ __bitmap_shift_left(dst, src, n, nbits);
+}
+
+static inline int bitmap_parse(const char *buf, unsigned int buflen,
+ unsigned long *maskp, int nmaskbits)
+{
+ return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __LINUX_BITMAP_H */
--- /dev/null
+# libcap-ng.m4 - Checks for the libcap-ng support
+# Copyright (c) 2009 Steve Grubb sgrubb@redhat.com
+#
+AC_DEFUN([LIBCAP_NG_PATH],
+[
+ AC_ARG_WITH(libcap-ng,
+ [ --with-libcap-ng=[auto/yes/no] Add Libcap-ng support [default=auto]],,
+ with_libcap_ng=auto)
+
+ # Check for Libcap-ng API
+ #
+ # libcap-ng detection
+
+ if test x$with_libcap_ng = xno ; then
+ have_libcap_ng=no;
+ else
+ # Start by checking for header file
+ AC_CHECK_HEADER(cap-ng.h, capng_headers=yes, capng_headers=no)
+
+ # See if we have libcap-ng library
+ AC_CHECK_LIB(cap-ng, capng_clear,
+ CAPNG_LDADD=-lcap-ng,)
+
+ # Check results are usable
+ if test x$with_libcap_ng = xyes -a x$CAPNG_LDADD = x ; then
+ AC_MSG_ERROR(libcap-ng support was requested and the library was not found)
+ fi
+ if test x$CAPNG_LDADD != x -a $capng_headers = no ; then
+ AC_MSG_ERROR(libcap-ng libraries found but headers are missing)
+ fi
+ fi
+ AC_SUBST(CAPNG_LDADD)
+ AC_MSG_CHECKING(whether to use libcap-ng)
+ if test x$CAPNG_LDADD != x ; then
+ AC_DEFINE(HAVE_LIBCAP_NG,1,[libcap-ng support])
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_RESULT(no)
+ fi
+])
--- /dev/null
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "irqbalance.h"
+#include "types.h"
+
+
+char *classes[] = {
+ "other",
+ "legacy",
+ "storage",
+ "timer",
+ "ethernet",
+ "gbit-ethernet",
+ "10gbit-ethernet",
+ 0
+};
+
+int map_class_to_level[7] =
+{ BALANCE_PACKAGE, BALANCE_CACHE, BALANCE_CACHE, BALANCE_NONE, BALANCE_CORE, BALANCE_CORE, BALANCE_CORE };
+
+
+int class_counts[7];
+
+/*
+
+NOTE NOTE although that this file has a hard-coded list of modules, something missing is not
+ a big deal; the types are also set based on PCI class information when available.
+
+*/
+
+/*
+
+ Based on the original irqbalance code which is:
+
+ Copyright (C) 2003 Red Hat, Inc. All rights reserved.
+
+ Usage and distribution of this file are subject to the Gnu General Public License Version 2
+ that can be found at http://www.gnu.org/licenses/gpl.txt and the COPYING file as
+ distributed together with this file is included herein by reference.
+
+ Author: Arjan van de Ven <arjanv@redhat.com>
+
+*/
+
+static char *legacy_modules[] = {
+ "PS/2",
+ "serial",
+ "i8042",
+ "acpi",
+ "floppy",
+ "parport",
+ "keyboard",
+ "usb-ohci",
+ "usb-uhci",
+ "uhci_hcd",
+ "ohci_hcd",
+ "ehci_hcd",
+ "EMU10K1",
+ 0
+};
+
+static char *timer_modules[] = {
+ "rtc",
+ "timer",
+ 0
+};
+
+static char *storage_modules[] = {
+ "aic7xxx",
+ "aic79xx",
+ "ide",
+ "cciss",
+ "cpqarray",
+ "qla2",
+ "megaraid",
+ "fusion",
+ "libata",
+ "ohci1394",
+ "sym53c8xx",
+ 0
+};
+
+static char *ethernet_modules[] = {
+ "eth",
+ "e100",
+ "eepro100",
+ "orinoco_cs",
+ "wvlan_cs",
+ "3c5",
+ "HiSax",
+ "skge",
+ "sky2",
+ 0
+};
+
+
+int find_class(struct interrupt *irq, char *moduletext)
+{
+ int guess = IRQ_OTHER;
+ int i;
+
+ if (moduletext == NULL)
+ return guess;
+
+ for (i=0; legacy_modules[i]; i++)
+ if (strstr(moduletext, legacy_modules[i]))
+ guess = IRQ_LEGACY;
+
+ for (i=0; storage_modules[i]; i++)
+ if (strstr(moduletext, storage_modules[i]))
+ guess = IRQ_SCSI;
+
+ for (i=0; timer_modules[i]; i++)
+ if (strstr(moduletext, timer_modules[i]))
+ guess = IRQ_TIMER;
+
+ for (i=0; ethernet_modules[i]; i++)
+ if (strstr(moduletext, ethernet_modules[i])) {
+ guess = IRQ_ETH;
+ if (strstr(moduletext, "-rx"))
+ guess = IRQ_GETH;
+ if (strstr(moduletext, "-tx"))
+ guess = IRQ_TGETH;
+ }
+
+ if (guess == IRQ_OTHER && irq->number==0)
+ guess = IRQ_TIMER;
+
+ if (guess > irq->class)
+ return guess;
+ return irq->class;
+}
--- /dev/null
+dnl
+define([AC_INIT_NOTICE],
+[### Generated automatically using autoconf version] AC_ACVERSION [
+### Copyright 2009 Steve Grubb <sgrubb@redhat.com>
+###
+### Permission is hereby granted, free of charge, to any person obtaining a
+### copy of this software and associated documentation files (the "Software"),
+### to deal in the Software without restriction, including without limitation
+### the rights to use, copy, modify, merge, publish, distribute, sublicense,
+### and/or sell copies of the Software, and to permit persons to whom the
+### Software is furnished to do so, subject to the following conditions:
+###
+### The above copyright notice and this permission notice shall be included
+### in all copies or substantial portions of the Software.
+###
+### THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+### IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+### FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+### THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+### OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+### ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+### OTHER DEALINGS IN THE SOFTWARE.
+###
+### For usage, run `./configure --help'
+### For more detailed information on installation, read the file `INSTALL'.
+###
+### If configuration succeeds, status is in the file `config.status'.
+### A log of configuration tests is in `config.log'.
+])
+
+AC_REVISION($Revision: 1.3 $)dnl
+AC_INIT(irqbalance,0.56)
+AC_PREREQ(2.12)dnl
+AM_CONFIG_HEADER(config.h)
+
+echo Configuring irqbalance $VERSION
+
+AC_CONFIG_MACRO_DIR([m4])
+AC_CANONICAL_TARGET
+AM_INIT_AUTOMAKE
+AM_PROG_LIBTOOL
+AC_SUBST(LIBTOOL_DEPS)
+
+echo .
+echo Checking for programs
+
+AC_PROG_CC
+AC_PROG_INSTALL
+AC_PROG_AWK
+
+echo .
+echo Checking for header files
+AC_HEADER_STDC
+AC_CHECK_HEADERS(linux/ethtool.h linux/sockios.h, [], [])
+
+AC_C_CONST
+AC_C_INLINE
+AM_PROG_CC_C_O
+
+PKG_CHECK_MODULES([GLIB], [glib-2.0])
+LIBCAP_NG_PATH
+
+AC_OUTPUT(Makefile)
+
+echo .
+echo "
+
+ irqbalance Version: $VERSION
+ Target: $target
+ Installation prefix: $prefix
+ Compiler: $CC
+ Compiler flags:
+`echo $CFLAGS | fmt -w 50 | sed 's,^, ,'`
+"
--- /dev/null
+#ifndef __INCLUDE_GUARD_CONSTANTS_H
+#define __INCLUDE_GUARD_CONSTANTS_H
+
+/* interval between rebalance attempts in seconds */
+#define SLEEP_INTERVAL 10
+
+/* NUMA topology refresh intervals, in units of SLEEP_INTERVAL */
+#define NUMA_REFRESH_INTERVAL 32
+/* NIC interrupt refresh interval, in units of SLEEP_INTERVAL */
+#define NIC_REFRESH_INTERVAL 32
+
+/* minimum number of interrupts since boot for an interrupt to matter */
+#define MIN_IRQ_COUNT 20
+
+
+/* balancing tunings */
+
+#define CROSS_PACKAGE_PENALTY 3000
+#define NUMA_PENALTY 250
+#define POWER_MODE_PACKAGE_THRESHOLD 20000
+#define CLASS_VIOLATION_PENTALTY 6000
+#define CORE_SPECIFIC_THRESHOLD 5000
+
+/* power mode */
+
+#define POWER_MODE_SOFTIRQ_THRESHOLD 20
+#define POWER_MODE_HYSTERESIS 3
+
+
+#endif
--- /dev/null
+#ifndef __LINUX_CPUMASK_H
+#define __LINUX_CPUMASK_H
+
+#define NR_CPUS 256
+/*
+ * Cpumasks provide a bitmap suitable for representing the
+ * set of CPU's in a system, one bit position per CPU number.
+ *
+ * See detailed comments in the file linux/bitmap.h describing the
+ * data type on which these cpumasks are based.
+ *
+ * For details of cpumask_scnprintf() and cpumask_parse_user(),
+ * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
+ * For details of cpulist_scnprintf() and cpulist_parse(), see
+ * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
+ * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c
+ * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c.
+ *
+ * The available cpumask operations are:
+ *
+ * void cpu_set(cpu, mask) turn on bit 'cpu' in mask
+ * void cpu_clear(cpu, mask) turn off bit 'cpu' in mask
+ * void cpus_setall(mask) set all bits
+ * void cpus_clear(mask) clear all bits
+ * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask
+ * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask
+ *
+ * void cpus_and(dst, src1, src2) dst = src1 & src2 [intersection]
+ * void cpus_or(dst, src1, src2) dst = src1 | src2 [union]
+ * void cpus_xor(dst, src1, src2) dst = src1 ^ src2
+ * void cpus_andnot(dst, src1, src2) dst = src1 & ~src2
+ * void cpus_complement(dst, src) dst = ~src
+ *
+ * int cpus_equal(mask1, mask2) Does mask1 == mask2?
+ * int cpus_intersects(mask1, mask2) Do mask1 and mask2 intersect?
+ * int cpus_subset(mask1, mask2) Is mask1 a subset of mask2?
+ * int cpus_empty(mask) Is mask empty (no bits sets)?
+ * int cpus_full(mask) Is mask full (all bits sets)?
+ * int cpus_weight(mask) Hamming weigh - number of set bits
+ *
+ * void cpus_shift_right(dst, src, n) Shift right
+ * void cpus_shift_left(dst, src, n) Shift left
+ *
+ * int first_cpu(mask) Number lowest set bit, or NR_CPUS
+ * int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
+ *
+ * cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
+ * CPU_MASK_ALL Initializer - all bits set
+ * CPU_MASK_NONE Initializer - no bits set
+ * unsigned long *cpus_addr(mask) Array of unsigned long's in mask
+ *
+ * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
+ * int cpumask_parse_user(ubuf, ulen, mask) Parse ascii string as cpumask
+ * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
+ * int cpulist_parse(buf, map) Parse ascii string as cpulist
+ * int cpu_remap(oldbit, old, new) newbit = map(old, new)(oldbit)
+ * int cpus_remap(dst, src, old, new) *dst = map(old, new)(src)
+ *
+ * for_each_cpu_mask(cpu, mask) for-loop cpu over mask
+ *
+ * int num_online_cpus() Number of online CPUs
+ * int num_possible_cpus() Number of all possible CPUs
+ * int num_present_cpus() Number of present CPUs
+ *
+ * int cpu_online(cpu) Is some cpu online?
+ * int cpu_possible(cpu) Is some cpu possible?
+ * int cpu_present(cpu) Is some cpu present (can schedule)?
+ *
+ * int any_online_cpu(mask) First online cpu in mask
+ *
+ * for_each_possible_cpu(cpu) for-loop cpu over cpu_possible_map
+ * for_each_online_cpu(cpu) for-loop cpu over cpu_online_map
+ * for_each_present_cpu(cpu) for-loop cpu over cpu_present_map
+ *
+ * Subtlety:
+ * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
+ * to generate slightly worse code. Note for example the additional
+ * 40 lines of assembly code compiling the "for each possible cpu"
+ * loops buried in the disk_stat_read() macros calls when compiling
+ * drivers/block/genhd.c (arch i386, CONFIG_SMP=y). So use a simple
+ * one-line #define for cpu_isset(), instead of wrapping an inline
+ * inside a macro, the way we do the other calls.
+ */
+
+#include "bitmap.h"
+
+typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+extern cpumask_t _unused_cpumask_arg_;
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+ set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+ clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+ bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+ bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+ __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
+static inline void __cpus_complement(cpumask_t *dstp,
+ const cpumask_t *srcp, int nbits)
+{
+ bitmap_complement(dstp->bits, srcp->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+ const cpumask_t *src2p, int nbits)
+{
+ return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
+static inline int __cpus_full(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_full(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+ return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_right(dst, src, n) \
+ __cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_right(cpumask_t *dstp,
+ const cpumask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+ __cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+ const cpumask_t *srcp, int n, int nbits)
+{
+ bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+
+static inline int __first_cpu(const cpumask_t *srcp)
+{
+ return ffs(*srcp->bits)-1;
+}
+
+#define first_cpu(src) __first_cpu(&(src))
+int __next_cpu(int n, const cpumask_t *srcp);
+#define next_cpu(n, src) __next_cpu((n), &(src))
+
+#define cpumask_of_cpu(cpu) \
+({ \
+ typeof(_unused_cpumask_arg_) m; \
+ if (sizeof(m) == sizeof(unsigned long)) { \
+ m.bits[0] = 1UL<<(cpu); \
+ } else { \
+ cpus_clear(m); \
+ cpu_set((cpu), m); \
+ } \
+ m; \
+})
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if 0
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#else
+
+#define CPU_MASK_ALL \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \
+ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \
+} }
+
+#endif
+
+#define CPU_MASK_NONE \
+(cpumask_t) { { \
+ [0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
+} }
+
+#define CPU_MASK_CPU0 \
+(cpumask_t) { { \
+ [0] = 1UL \
+} }
+
+#define cpus_addr(src) ((src).bits)
+
+#define cpumask_scnprintf(buf, len, src) \
+ __cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpumask_scnprintf(char *buf, int len,
+ const cpumask_t *srcp, int nbits)
+{
+ return bitmap_scnprintf(buf, len, srcp->bits, nbits);
+}
+
+#define cpumask_parse_user(ubuf, ulen, dst) \
+ __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
+static inline int __cpumask_parse_user(const char *buf, int len,
+ cpumask_t *dstp, int nbits)
+{
+ return bitmap_parse(buf, len, dstp->bits, nbits);
+}
+
+#define cpulist_scnprintf(buf, len, src) \
+ __cpulist_scnprintf((buf), (len), &(src), NR_CPUS)
+static inline int __cpulist_scnprintf(char *buf, int len,
+ const cpumask_t *srcp, int nbits)
+{
+ return bitmap_scnlistprintf(buf, len, srcp->bits, nbits);
+}
+
+#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS)
+static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits)
+{
+ return bitmap_parselist(buf, dstp->bits, nbits);
+}
+
+#define cpu_remap(oldbit, old, new) \
+ __cpu_remap((oldbit), &(old), &(new), NR_CPUS)
+static inline int __cpu_remap(int oldbit,
+ const cpumask_t *oldp, const cpumask_t *newp, int nbits)
+{
+ return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
+}
+
+#define cpus_remap(dst, src, old, new) \
+ __cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS)
+static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
+ const cpumask_t *oldp, const cpumask_t *newp, int nbits)
+{
+ bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
+}
+
+#if NR_CPUS > 1
+#define for_each_cpu_mask(cpu, mask) \
+ for ((cpu) = first_cpu(mask); \
+ (cpu) < NR_CPUS; \
+ (cpu) = next_cpu((cpu), (mask)))
+#else /* NR_CPUS == 1 */
+#define for_each_cpu_mask(cpu, mask) \
+ for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#endif /* NR_CPUS */
+
+/*
+ * The following particular system cpumasks and operations manage
+ * possible, present and online cpus. Each of them is a fixed size
+ * bitmap of size NR_CPUS.
+ *
+ * #ifdef CONFIG_HOTPLUG_CPU
+ * cpu_possible_map - has bit 'cpu' set iff cpu is populatable
+ * cpu_present_map - has bit 'cpu' set iff cpu is populated
+ * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler
+ * #else
+ * cpu_possible_map - has bit 'cpu' set iff cpu is populated
+ * cpu_present_map - copy of cpu_possible_map
+ * cpu_online_map - has bit 'cpu' set iff cpu available to scheduler
+ * #endif
+ *
+ * In either case, NR_CPUS is fixed at compile time, as the static
+ * size of these bitmaps. The cpu_possible_map is fixed at boot
+ * time, as the set of CPU id's that it is possible might ever
+ * be plugged in at anytime during the life of that system boot.
+ * The cpu_present_map is dynamic(*), representing which CPUs
+ * are currently plugged in. And cpu_online_map is the dynamic
+ * subset of cpu_present_map, indicating those CPUs available
+ * for scheduling.
+ *
+ * If HOTPLUG is enabled, then cpu_possible_map is forced to have
+ * all NR_CPUS bits set, otherwise it is just the set of CPUs that
+ * ACPI reports present at boot.
+ *
+ * If HOTPLUG is enabled, then cpu_present_map varies dynamically,
+ * depending on what ACPI reports as currently plugged in, otherwise
+ * cpu_present_map is just a copy of cpu_possible_map.
+ *
+ * (*) Well, cpu_present_map is dynamic in the hotplug case. If not
+ * hotplug, it's a copy of cpu_possible_map, hence fixed at boot.
+ *
+ * Subtleties:
+ * 1) UP arch's (NR_CPUS == 1, CONFIG_SMP not defined) hardcode
+ * assumption that their single CPU is online. The UP
+ * cpu_{online,possible,present}_maps are placebos. Changing them
+ * will have no useful affect on the following num_*_cpus()
+ * and cpu_*() macros in the UP case. This ugliness is a UP
+ * optimization - don't waste any instructions or memory references
+ * asking if you're online or how many CPUs there are if there is
+ * only one CPU.
+ * 2) Most SMP arch's #define some of these maps to be some
+ * other map specific to that arch. Therefore, the following
+ * must be #define macros, not inlines. To see why, examine
+ * the assembly code produced by the following. Note that
+ * set1() writes phys_x_map, but set2() writes x_map:
+ * int x_map, phys_x_map;
+ * #define set1(a) x_map = a
+ * inline void set2(int a) { x_map = a; }
+ * #define x_map phys_x_map
+ * main(){ set1(3); set2(5); }
+ */
+
+extern cpumask_t cpu_possible_map;
+extern cpumask_t cpu_online_map;
+extern cpumask_t cpu_present_map;
+
+#if NR_CPUS > 1
+#define num_online_cpus() cpus_weight(cpu_online_map)
+#define num_possible_cpus() cpus_weight(cpu_possible_map)
+#define num_present_cpus() cpus_weight(cpu_present_map)
+#define cpu_online(cpu) cpu_isset((cpu), cpu_online_map)
+#define cpu_possible(cpu) cpu_isset((cpu), cpu_possible_map)
+#define cpu_present(cpu) cpu_isset((cpu), cpu_present_map)
+#else
+#define num_online_cpus() 1
+#define num_possible_cpus() 1
+#define num_present_cpus() 1
+#define cpu_online(cpu) ((cpu) == 0)
+#define cpu_possible(cpu) ((cpu) == 0)
+#define cpu_present(cpu) ((cpu) == 0)
+#endif
+
+int highest_possible_processor_id(void);
+#define any_online_cpu(mask) __any_online_cpu(&(mask))
+int __any_online_cpu(const cpumask_t *mask);
+
+#define for_each_possible_cpu(cpu) for_each_cpu_mask((cpu), cpu_possible_map)
+#define for_each_online_cpu(cpu) for_each_cpu_mask((cpu), cpu_online_map)
+#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
+
+#endif /* __LINUX_CPUMASK_H */
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This file contains the code to construct and manipulate a hierarchy of processors,
+ * cache domains and processor cores.
+ */
+
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <glib.h>
+
+#include "irqbalance.h"
+
+
+GList *cpus;
+GList *cache_domains;
+GList *packages;
+
+int package_count;
+int cache_domain_count;
+int core_count;
+
+/* Users want to be able to keep interrupts away from some cpus; store these in a cpumask_t */
+cpumask_t banned_cpus;
+
+
+/*
+ it's convenient to have the complement of banned_cpus available so that
+ the AND operator can be used to mask out unwanted cpus
+*/
+static cpumask_t unbanned_cpus;
+
+static void fill_packages(void)
+{
+ GList *entry;
+
+ entry = g_list_first(cache_domains);
+ while (entry) {
+ struct package *package;
+ struct cache_domain *cache = NULL;
+ GList *entry2;
+
+ cache = entry->data;
+ entry2 = entry;
+ entry = g_list_next(entry);
+ if (cache->marker)
+ continue;
+ package = malloc(sizeof(struct package));
+ if (!package)
+ break;
+ memset(package, 0, sizeof(struct package));
+ package->mask = cache->package_mask;
+ package->number = cache->number;
+ while (entry2) {
+ struct cache_domain *cache2;
+ cache2 = entry2->data;
+ if (cpus_equal(cache->package_mask, cache2->package_mask)) {
+ cache2->marker = 1;
+ package->cache_domains = g_list_append(package->cache_domains, cache2);
+ if (package->number > cache2->number)
+ package->number = cache2->number;
+ }
+ entry2 = g_list_next(entry2);
+ }
+ packages = g_list_append(packages, package);
+ package_count++;
+ }
+}
+
+static void fill_cache_domain(void)
+{
+ GList *entry;
+
+ entry = g_list_first(cpus);
+ while (entry) {
+ struct cache_domain *cache = NULL;
+ struct cpu_core *cpu;
+ GList *entry2;
+ cpu = entry->data;
+ entry2 = entry;
+ entry = g_list_next(entry);
+ if (cpu->marker)
+ continue;
+ cache = malloc(sizeof(struct cache_domain));
+ if (!cache)
+ break;
+ memset(cache, 0, sizeof(struct cache_domain));
+ cache->mask = cpu->cache_mask;
+ cache->package_mask = cpu->package_mask;
+ cache->number = cpu->number;
+ cache_domains = g_list_append(cache_domains, cache);
+ cache_domain_count++;
+ while (entry2) {
+ struct cpu_core *cpu2;
+ cpu2 = entry2->data;
+ if (cpus_equal(cpu->cache_mask, cpu2->cache_mask) &&
+ cpus_equal(cpu->package_mask, cpu2->package_mask)) {
+ cpu2->marker = 1;
+ cache->cpu_cores = g_list_append(cache->cpu_cores, cpu2);
+ if (cpu2->number < cache->number)
+ cache->number = cpu2->number;
+ }
+ entry2 = g_list_next(entry2);
+ }
+ }
+}
+
+
+static void do_one_cpu(char *path)
+{
+ struct cpu_core *cpu;
+ FILE *file;
+ char new_path[PATH_MAX];
+
+ /* skip offline cpus */
+ snprintf(new_path, PATH_MAX, "%s/online", path);
+ file = fopen(new_path, "r");
+ if (file) {
+ char *line = NULL;
+ size_t size = 0;
+ if (getline(&line, &size, file)==0)
+ return;
+ fclose(file);
+ if (line && line[0]=='0') {
+ free(line);
+ return;
+ }
+ free(line);
+ }
+
+ cpu = malloc(sizeof(struct cpu_core));
+ if (!cpu)
+ return;
+ memset(cpu, 0, sizeof(struct cpu_core));
+
+ cpu->number = strtoul(&path[27], NULL, 10);
+
+ cpu_set(cpu->number, cpu->mask);
+
+ /* if the cpu is on the banned list, just don't add it */
+ if (cpus_intersects(cpu->mask, banned_cpus)) {
+ free(cpu);
+ /* even though we don't use the cpu we do need to count it */
+ core_count++;
+ return;
+ }
+
+
+ /* try to read the package mask; if it doesn't exist assume solitary */
+ snprintf(new_path, PATH_MAX, "%s/topology/core_siblings", path);
+ file = fopen(new_path, "r");
+ cpu_set(cpu->number, cpu->package_mask);
+ if (file) {
+ char *line = NULL;
+ size_t size = 0;
+ if (getline(&line, &size, file))
+ cpumask_parse_user(line, strlen(line), cpu->package_mask);
+ fclose(file);
+ free(line);
+ }
+
+ /* try to read the cache mask; if it doesn't exist assume solitary */
+ /* We want the deepest cache level available so try index1 first, then index2 */
+ cpu_set(cpu->number, cpu->cache_mask);
+ snprintf(new_path, PATH_MAX, "%s/cache/index1/shared_cpu_map", path);
+ file = fopen(new_path, "r");
+ if (file) {
+ char *line = NULL;
+ size_t size = 0;
+ if (getline(&line, &size, file))
+ cpumask_parse_user(line, strlen(line), cpu->cache_mask);
+ fclose(file);
+ free(line);
+ }
+ snprintf(new_path, PATH_MAX, "%s/cache/index2/shared_cpu_map", path);
+ file = fopen(new_path, "r");
+ if (file) {
+ char *line = NULL;
+ size_t size = 0;
+ if (getline(&line, &size, file))
+ cpumask_parse_user(line, strlen(line), cpu->cache_mask);
+ fclose(file);
+ free(line);
+ }
+
+ /*
+ blank out the banned cpus from the various masks so that interrupts
+ will never be told to go there
+ */
+ cpus_and(cpu->cache_mask, cpu->cache_mask, unbanned_cpus);
+ cpus_and(cpu->package_mask, cpu->package_mask, unbanned_cpus);
+ cpus_and(cpu->mask, cpu->mask, unbanned_cpus);
+
+ cpus = g_list_append(cpus, cpu);
+ core_count++;
+}
+
+static void dump_irqs(int spaces, GList *interrupts)
+{
+ struct interrupt *irq;
+ while (interrupts) {
+ int i;
+ for (i=0; i<spaces;i++) printf(" ");
+ irq = interrupts->data;
+ printf("Interrupt %i (%s/%u) \n", irq->number, classes[irq->class], (unsigned int)irq->workload);
+ interrupts = g_list_next(interrupts);
+ }
+}
+
+void dump_tree(void)
+{
+ GList *p_iter, *c_iter, *cp_iter;
+ struct package *package;
+ struct cache_domain *cache_domain;
+ struct cpu_core *cpu;
+
+ char buffer[4096];
+ p_iter = g_list_first(packages);
+ while (p_iter) {
+ package = p_iter->data;
+ cpumask_scnprintf(buffer, 4096, package->mask);
+ printf("Package %i: cpu mask is %s (workload %lu)\n", package->number, buffer, (unsigned long)package->workload);
+ c_iter = g_list_first(package->cache_domains);
+ while (c_iter) {
+ cache_domain = c_iter->data;
+ c_iter = g_list_next(c_iter);
+ cpumask_scnprintf(buffer, 4095, cache_domain->mask);
+ printf(" Cache domain %i: cpu mask is %s (workload %lu) \n", cache_domain->number, buffer, (unsigned long)cache_domain->workload);
+ cp_iter = cache_domain->cpu_cores;
+ while (cp_iter) {
+ cpu = cp_iter->data;
+ cp_iter = g_list_next(cp_iter);
+ printf(" CPU number %i (workload %lu)\n", cpu->number, (unsigned long)cpu->workload);
+ dump_irqs(18, cpu->interrupts);
+ }
+ dump_irqs(10, cache_domain->interrupts);
+ }
+ dump_irqs(2, package->interrupts);
+ p_iter = g_list_next(p_iter);
+ }
+}
+
+/*
+ * this function removes previous state from the cpu tree, such as
+ * which level does how much work and the actual lists of interrupts
+ * assigned to each component
+ */
+void clear_work_stats(void)
+{
+ GList *p_iter, *c_iter, *cp_iter;
+ struct package *package;
+ struct cache_domain *cache_domain;
+ struct cpu_core *cpu;
+
+ p_iter = g_list_first(packages);
+ while (p_iter) {
+ package = p_iter->data;
+ package->workload = 0;
+ g_list_free(package->interrupts);
+ package->interrupts = NULL;
+ c_iter = g_list_first(package->cache_domains);
+ memset(package->class_count, 0, sizeof(package->class_count));
+ while (c_iter) {
+ cache_domain = c_iter->data;
+ c_iter = g_list_next(c_iter);
+ cache_domain->workload = 0;
+ cp_iter = cache_domain->cpu_cores;
+ g_list_free(cache_domain->interrupts);
+ cache_domain->interrupts = NULL;
+ memset(cache_domain->class_count, 0, sizeof(cache_domain->class_count));
+ while (cp_iter) {
+ cpu = cp_iter->data;
+ cp_iter = g_list_next(cp_iter);
+ cpu->workload = 0;
+ g_list_free(cpu->interrupts);
+ cpu->interrupts = NULL;
+ memset(cpu->class_count, 0, sizeof(cpu->class_count));
+ }
+ }
+ p_iter = g_list_next(p_iter);
+ }
+}
+
+
+void parse_cpu_tree(void)
+{
+ DIR *dir;
+ struct dirent *entry;
+
+ cpus_complement(unbanned_cpus, banned_cpus);
+
+ dir = opendir("/sys/devices/system/cpu");
+ if (!dir)
+ return;
+ do {
+ entry = readdir(dir);
+ if (entry && strlen(entry->d_name)>3 && strstr(entry->d_name,"cpu")) {
+ char new_path[PATH_MAX];
+ sprintf(new_path, "/sys/devices/system/cpu/%s", entry->d_name);
+ do_one_cpu(new_path);
+ }
+ } while (entry);
+ closedir(dir);
+
+ fill_cache_domain();
+ fill_packages();
+
+ if (debug_mode)
+ dump_tree();
+
+}
+
+
+/*
+ * This function frees all memory related to a cpu tree so that a new tree
+ * can be read
+ */
+void clear_cpu_tree(void)
+{
+ GList *item;
+ struct cpu_core *cpu;
+ struct cache_domain *cache_domain;
+ struct package *package;
+
+ while (packages) {
+ item = g_list_first(packages);
+ package = item->data;
+ g_list_free(package->cache_domains);
+ g_list_free(package->interrupts);
+ free(package);
+ packages = g_list_delete_link(packages, item);
+ }
+ package_count = 0;
+
+ while (cache_domains) {
+ item = g_list_first(cache_domains);
+ cache_domain = item->data;
+ g_list_free(cache_domain->cpu_cores);
+ g_list_free(cache_domain->interrupts);
+ free(cache_domain);
+ cache_domains = g_list_delete_link(cache_domains, item);
+ }
+ cache_domain_count = 0;
+
+
+ while (cpus) {
+ item = g_list_first(cpus);
+ cpu = item->data;
+ g_list_free(cpu->interrupts);
+ free(cpu);
+ cpus = g_list_delete_link(cpus, item);
+ }
+ core_count = 0;
+
+}
--- /dev/null
+.\"Generated by db2man.xsl. Don't modify this, modify the source.
+.de Sh \" Subsection
+.br
+.if t .Sp
+.ne 5
+.PP
+\fB\\$1\fR
+.PP
+..
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Ip \" List item
+.br
+.ie \\n(.$>=3 .ne \\$3
+.el .ne 3
+.IP "\\$1" \\$2
+..
+.TH "IRQBALANCE" 1 "Dec 2006" "Linux" "irqbalance"
+.SH NAME
+irqbalance \- distribute hardware interrupts across processors on a multiprocessor system
+.SH "SYNOPSIS"
+
+.nf
+\fBirqbalance\fR
+.fi
+
+.SH "DESCRIPTION"
+
+.PP
+The purpose of \fBirqbalance\fR is distribute hardware interrupts across processors on a multiprocessor system in order to increase performance\&.
+
+.SH "OPTIONS"
+
+.TP
+.B --oneshot
+Causes irqbalance to be run once, after which the daemon exits
+.TP
+
+.B --debug
+Causes irqbalance to run in the foreground and extra debug information to be printed
+
+.SH "ENVIRONMENT VARIABLES"
+.TP
+.B IRQBALANCE_ONESHOT
+Same as --oneshot
+
+.TP
+.B IRQBALANCE_DEBUG
+Same as --debug
+
+.TP
+.B IRQBALANCE_BANNED_CPUS
+Provides a mask of cpus which irqbalance should ignore and never assign interrupts to
+
+.TP
+.B IRQBALANCE_BANNED_INTERRUPTS
+A list of space delimited IRQ numbers that irqbalance should not touch
+
+.SH "Homepage"
+http://www.irqbalance.org
+
+
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <sys/time.h>
+#ifdef HAVE_LIBCAP_NG
+#include <cap-ng.h>
+#endif
+#include "irqbalance.h"
+
+int one_shot_mode;
+int debug_mode;
+
+int need_cpu_rescan;
+
+extern cpumask_t banned_cpus;
+
+static int counter;
+
+
+void sleep_approx(int seconds)
+{
+ struct timespec ts;
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ ts.tv_sec = seconds;
+ ts.tv_nsec = -tv.tv_usec*1000;
+ while (ts.tv_nsec < 0) {
+ ts.tv_sec--;
+ ts.tv_nsec += 1000000000;
+ }
+ nanosleep(&ts, NULL);
+}
+
+int main(int argc, char** argv)
+{
+ if (argc>1 && strstr(argv[1],"debug"))
+ debug_mode=1;
+ if (argc>1 && strstr(argv[1],"oneshot"))
+ one_shot_mode=1;
+
+ if (getenv("IRQBALANCE_BANNED_CPUS")) {
+ cpumask_parse_user(getenv("IRQBALANCE_BANNED_CPUS"), strlen(getenv("IRQBALANCE_BANNED_CPUS")), banned_cpus);
+ }
+
+ if (getenv("IRQBALANCE_ONESHOT"))
+ one_shot_mode=1;
+
+ if (getenv("IRQBALANCE_DEBUG"))
+ debug_mode=1;
+
+ parse_cpu_tree();
+
+
+ /* On single core UP systems irqbalance obviously has no work to do */
+ if (core_count<2)
+ exit(EXIT_SUCCESS);
+ /* On dual core/hyperthreading shared cache systems just do a one shot setup */
+ if (cache_domain_count==1)
+ one_shot_mode = 1;
+
+ if (!debug_mode)
+ if (daemon(0,0))
+ exit(EXIT_FAILURE);
+
+#ifdef HAVE_LIBCAP_NG
+ // Drop capabilities
+ capng_clear(CAPNG_SELECT_BOTH);
+ capng_lock();
+ capng_apply(CAPNG_SELECT_BOTH);
+#endif
+
+ parse_proc_interrupts();
+ sleep(SLEEP_INTERVAL/4);
+ reset_counts();
+ parse_proc_interrupts();
+ pci_numa_scan();
+ calculate_workload();
+ sort_irq_list();
+ if (debug_mode)
+ dump_workloads();
+
+ while (1) {
+ sleep_approx(SLEEP_INTERVAL);
+ if (debug_mode)
+ printf("\n\n\n-----------------------------------------------------------------------------\n");
+
+
+ check_power_mode();
+ parse_proc_interrupts();
+
+ /* cope with cpu hotplug -- detected during /proc/interrupts parsing */
+ if (need_cpu_rescan) {
+ need_cpu_rescan = 0;
+ /* if there's a hotplug event we better turn off power mode for a bit until things settle */
+ power_mode = 0;
+ if (debug_mode)
+ printf("Rescanning cpu topology \n");
+ reset_counts();
+ clear_work_stats();
+
+ clear_cpu_tree();
+ parse_cpu_tree();
+ }
+
+ /* deal with NAPI */
+ account_for_nic_stats();
+ calculate_workload();
+
+ /* to cope with dynamic configurations we scan for new numa information
+ * once every 5 minutes
+ */
+ if (counter % NUMA_REFRESH_INTERVAL == 16)
+ pci_numa_scan();
+
+ calculate_placement();
+ activate_mapping();
+
+ if (debug_mode)
+ dump_tree();
+ if (one_shot_mode)
+ break;
+ counter++;
+ }
+ return EXIT_SUCCESS;
+}
--- /dev/null
+#ifndef __INCLUDE_GUARD_IRQBALANCE_H_
+#define __INCLUDE_GUARD_IRQBALANCE_H_
+
+
+#include "constants.h"
+
+#include "cpumask.h"
+
+#include <stdint.h>
+#include <glib.h>
+
+#include "types.h"
+
+struct interrupt;
+
+extern int package_count;
+extern int cache_domain_count;
+extern int core_count;
+extern char *classes[];
+extern int map_class_to_level[7];
+extern int class_counts[7];
+extern int debug_mode;
+extern int power_mode;
+extern int need_cpu_rescan;
+extern int one_shot_mode;
+extern GList *interrupts;
+
+
+extern void parse_cpu_tree(void);
+extern void clear_work_stats(void);
+extern void parse_proc_interrupts(void);
+extern void set_interrupt_count(int number, uint64_t count);
+extern void add_interrupt_count(int number, uint64_t count, int type);
+extern int find_class(struct interrupt *irq, char *string);
+extern void add_interrupt_numa(int number, cpumask_t mask, int type);
+
+void calculate_workload(void);
+void reset_counts(void);
+void dump_workloads(void);
+void sort_irq_list(void);
+void calculate_placement(void);
+void dump_tree(void);
+
+void activate_mapping(void);
+void account_for_nic_stats(void);
+void check_power_mode(void);
+void clear_cpu_tree(void);
+void pci_numa_scan(void);
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This file has the basic functions to manipulate interrupt metadata
+ */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include "types.h"
+#include "irqbalance.h"
+
+GList *interrupts;
+
+
+
+void get_affinity_hint(struct interrupt *irq, int number)
+{
+ char buf[PATH_MAX];
+ char *line = NULL;
+ size_t size = 0;
+ FILE *file;
+ sprintf(buf, "/proc/irq/%i/affinity_hint", number);
+ file = fopen(buf, "r");
+ if (!file)
+ return;
+ if (getline(&line, &size, file)==0) {
+ free(line);
+ fclose(file);
+ return;
+ }
+ cpumask_parse_user(line, strlen(line), irq->node_mask);
+ fclose(file);
+ free(line);
+}
+
+/*
+ * This function classifies and reads various things from /proc about a specific irq
+ */
+static void investigate(struct interrupt *irq, int number)
+{
+ DIR *dir;
+ struct dirent *entry;
+ char *c, *c2;
+ int nr , count = 0;
+ char buf[PATH_MAX];
+ sprintf(buf, "/proc/irq/%i", number);
+ dir = opendir(buf);
+ do {
+ entry = readdir(dir);
+ if (!entry)
+ break;
+ if (strcmp(entry->d_name,"smp_affinity")==0) {
+ char *line = NULL;
+ size_t size = 0;
+ FILE *file;
+ sprintf(buf, "/proc/irq/%i/smp_affinity", number);
+ file = fopen(buf, "r");
+ if (!file)
+ continue;
+ if (getline(&line, &size, file)==0) {
+ free(line);
+ fclose(file);
+ continue;
+ }
+ cpumask_parse_user(line, strlen(line), irq->mask);
+ fclose(file);
+ free(line);
+ } else if (strcmp(entry->d_name,"allowed_affinity")==0) {
+ char *line = NULL;
+ size_t size = 0;
+ FILE *file;
+ sprintf(buf, "/proc/irq/%i/allowed_affinity", number);
+ file = fopen(buf, "r");
+ if (!file)
+ continue;
+ if (getline(&line, &size, file)==0) {
+ free(line);
+ fclose(file);
+ continue;
+ }
+ cpumask_parse_user(line, strlen(line), irq->allowed_mask);
+ fclose(file);
+ free(line);
+ } else if (strcmp(entry->d_name,"affinity_hint")==0) {
+ get_affinity_hint(irq, number);
+ } else {
+ irq->class = find_class(irq, entry->d_name);
+ }
+
+ } while (entry);
+ closedir(dir);
+ irq->balance_level = map_class_to_level[irq->class];
+
+ for (nr = 0; nr < NR_CPUS; nr++)
+ if (cpu_isset(nr, irq->allowed_mask))
+ count++;
+
+ /* if there is no choice in the allowed mask, don't bother to balance */
+ if (count<2)
+ irq->balance_level = BALANCE_NONE;
+
+
+ /* next, check the IRQBALANCE_BANNED_INTERRUPTS env variable for blacklisted irqs */
+ c = c2 = getenv("IRQBALANCE_BANNED_INTERRUPTS");
+ if (!c)
+ return;
+
+ do {
+ c = c2;
+ nr = strtoul(c, &c2, 10);
+ if (c!=c2 && nr == number)
+ irq->balance_level = BALANCE_NONE;
+ } while (c!=c2 && c2!=NULL);
+}
+
+
+/*
+ * Set the number of interrupts received for a specific irq;
+ * create the irq metadata if there is none yet
+ */
+void set_interrupt_count(int number, uint64_t count)
+{
+ GList *item;
+ struct interrupt *irq;
+
+ if (count < MIN_IRQ_COUNT && !one_shot_mode)
+ return; /* no need to track or set interrupts sources without any activity since boot
+ but allow for a few (20) boot-time-only interrupts */
+
+ item = g_list_first(interrupts);
+ while (item) {
+ irq = item->data;
+
+ if (irq->number == number) {
+ irq->count = count;
+ /* see if affinity_hint changed */
+ get_affinity_hint(irq, number);
+ return;
+ }
+ item = g_list_next(item);
+ }
+ /* new interrupt */
+ irq = malloc(sizeof(struct interrupt));
+ if (!irq)
+ return;
+ memset(irq, 0, sizeof(struct interrupt));
+ irq->number = number;
+ irq->count = count;
+ irq->allowed_mask = CPU_MASK_ALL;
+ investigate(irq, number);
+ interrupts = g_list_append(interrupts, irq);
+}
+
+/*
+ * Add extra irqs to a specific irq metadata structure;
+ * if no such metadata exists, do nothing at all
+ */
+void add_interrupt_count(int number, uint64_t count, int type)
+{
+ GList *item;
+ struct interrupt *irq;
+
+ if (!count)
+ return;
+
+ item = g_list_first(interrupts);
+ while (item) {
+ irq = item->data;
+ item = g_list_next(item);
+
+ if (irq->number == number) {
+ irq->extra += count;
+ if (irq->class < type && irq->balance_level != BALANCE_NONE) {
+ irq->class = type;
+ irq->balance_level = map_class_to_level[irq->class];
+ }
+ return;
+ }
+ }
+}
+
+/*
+ * Set the numa affinity mask for a specific interrupt if there
+ * is metadata for the interrupt; do nothing if no such data
+ * exists.
+ */
+void add_interrupt_numa(int number, cpumask_t mask, int type)
+{
+ GList *item;
+ struct interrupt *irq;
+
+ item = g_list_first(interrupts);
+ while (item) {
+ irq = item->data;
+ item = g_list_next(item);
+
+ if (irq->number == number) {
+ cpus_or(irq->numa_mask, irq->numa_mask, mask);
+ if (irq->class < type && irq->balance_level != BALANCE_NONE) {
+ irq->class = type;
+ irq->balance_level = map_class_to_level[irq->class];
+ }
+ return;
+ }
+ }
+}
+
+void calculate_workload(void)
+{
+ int i;
+ GList *item;
+ struct interrupt *irq;
+
+ for (i=0; i<7; i++)
+ class_counts[i]=0;
+ item = g_list_first(interrupts);
+ while (item) {
+ irq = item->data;
+ item = g_list_next(item);
+
+ irq->workload = irq->count - irq->old_count + irq->workload/3 + irq->extra;
+ class_counts[irq->class]++;
+ irq->old_count = irq->count;
+ irq->extra = 0;
+ }
+}
+
+void reset_counts(void)
+{
+ GList *item;
+ struct interrupt *irq;
+ item = g_list_first(interrupts);
+ while (item) {
+ irq = item->data;
+ item = g_list_next(item);
+ irq->old_count = irq->count;
+ irq->extra = 0;
+
+ }
+}
+
+void dump_workloads(void)
+{
+ GList *item;
+ struct interrupt *irq;
+ item = g_list_first(interrupts);
+ while (item) {
+ irq = item->data;
+ item = g_list_next(item);
+
+ printf("Interrupt %i (class %s) has workload %lu \n", irq->number, classes[irq->class], (unsigned long)irq->workload);
+
+ }
+}
+
+
+static gint sort_irqs(gconstpointer A, gconstpointer B)
+{
+ struct interrupt *a, *b;
+ a = (struct interrupt*)A;
+ b = (struct interrupt*)B;
+
+ if (a->class < b->class)
+ return 1;
+ if (a->class > b->class)
+ return -1;
+ if (a->workload < b->workload)
+ return 1;
+ if (a->workload > b->workload)
+ return -1;
+ if (a<b)
+ return 1;
+ return -1;
+
+}
+
+void sort_irq_list(void)
+{
+ /* sort by class first (high->low) and then by workload (high->low) */
+ interrupts = g_list_sort(interrupts, sort_irqs);
+}
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Due to NAPI, the actual number of interrupts for a network NIC is usually low
+ * even though the amount of work is high; this file is there to compensate for this
+ * by adding actual package counts to the calculated amount of work of interrupts
+ */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/types.h>
+/* some distros (Debian / SLES) ship a totally broken ethtool.h */
+/* work around the breakage some */
+#define u32 __u32
+#define u16 __u16
+#define u8 __u8
+#define u64 __u64
+#include <linux/ethtool.h>
+#undef u8
+#undef u16
+#undef u32
+#undef u64
+#include <glib.h>
+#include <net/if.h>
+#include <linux/sockios.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <stdint.h>
+
+
+#include "irqbalance.h"
+
+struct nic {
+ char ethname[64];
+ int irq;
+ uint64_t prev_pkt;
+ int counter;
+};
+
+static GList *nics;
+
+
+static int dev_to_irq(char *devname)
+{
+ int sock, ret;
+ struct ifreq ifr;
+ struct ethtool_value ethtool;
+ struct ethtool_drvinfo driver;
+ FILE *file;
+ char *line = NULL;
+ size_t size;
+ int val;
+
+ char buffer[PATH_MAX];
+
+ memset(&ifr, 0, sizeof(struct ifreq));
+ memset(ðtool, 0, sizeof(struct ethtool_value));
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock<0)
+ return 0;
+
+ strcpy(ifr.ifr_name, devname);
+
+ driver.cmd = ETHTOOL_GDRVINFO;
+ ifr.ifr_data = (void*) &driver;
+ ret = ioctl(sock, SIOCETHTOOL, &ifr);
+ close(sock);
+ if (ret<0)
+ return 0;
+ sprintf(buffer,"/sys/bus/pci/devices/%s/irq", driver.bus_info);
+ file = fopen(buffer, "r");
+ if (!file)
+ return 0;
+ if (getline(&line, &size, file)==0) {
+ free(line);
+ fclose(file);
+ return 0;
+ }
+ fclose(file);
+ val = 0;
+ if (line)
+ val = strtoul(line, NULL, 10);
+ free(line);
+ return val;
+}
+
+static struct nic *new_nic(char *name)
+{
+ struct nic *nic;
+ nic = malloc(sizeof(struct nic));
+ if (!nic)
+ return NULL;
+ memset(nic, 0, sizeof(struct nic));
+ strcpy(nic->ethname, name);
+ nic->irq = dev_to_irq(name);
+ nics = g_list_append(nics, nic);
+ return nic;
+}
+
+static struct nic *find_nic(char *name)
+{
+ GList *item;
+ struct nic *nic;
+ item = g_list_first(nics);
+ while (item) {
+ nic = item->data;
+ item = g_list_next(item);
+ if (strcmp(nic->ethname, name)==0) {
+ nic->counter++;
+ /* refresh irq information once in a while; ifup/down
+ * can make this info go stale over time
+ */
+ if ((nic->counter % NIC_REFRESH_INTERVAL) == 0)
+ nic->irq = dev_to_irq(nic->ethname);
+ return nic;
+ }
+ }
+ nic = new_nic(name);
+ return nic;
+}
+
+void account_for_nic_stats(void)
+{
+ struct nic *nic;
+ FILE *file;
+ char *line = NULL;
+ size_t size = 0;
+ file = fopen("/proc/net/dev", "r");
+ if (!file)
+ return;
+ /* first two lines are headers */
+ if (getline(&line, &size, file)==0) {
+ free(line);
+ return;
+ }
+ if (getline(&line, &size, file)==0) {
+ free(line);
+ return;
+ }
+
+ while (!feof(file)) {
+ uint64_t rxcount;
+ uint64_t txcount;
+ uint64_t delta;
+ int dummy;
+ char *c, *c2;
+ if (getline(&line, &size, file)==0)
+ break;
+ if (line==NULL)
+ break;
+ c = strchr(line, ':');
+ if (c==NULL) /* header line */
+ continue;
+ *c = 0;
+ c++;
+ c2 = &line[0];
+ while (*c2==' ') c2++;
+ nic = find_nic(c2);
+ if (!nic)
+ continue;
+ dummy = strtoul(c, &c, 10);
+ rxcount = strtoull(c, &c, 10);
+ dummy = strtoul(c, &c, 10);
+ dummy = strtoul(c, &c, 10);
+ dummy = strtoul(c, &c, 10);
+ dummy = strtoul(c, &c, 10);
+ dummy = strtoul(c, &c, 10);
+ dummy = strtoul(c, &c, 10);
+ dummy = strtoul(c, &c, 10);
+ txcount = strtoull(c, &c, 10);
+ delta = (txcount+rxcount-nic->prev_pkt)/2;
+ /* add the RX and TX packets to the irq count, but only for 50%;
+ many packets generate another IRQ anyway and we don't want to
+ overweigh this too much. Also limit this to 100.000 max */
+ if (delta>100000)
+ delta = 100000;
+ if (delta>0 && nic->prev_pkt != 0)
+ add_interrupt_count(nic->irq, delta, IRQ_ETH);
+ nic->prev_pkt = rxcount + txcount;
+
+
+ }
+ fclose(file);
+ free(line);
+}
--- /dev/null
+/*
+
+This file is copied from the Linux kernel and mildly adjusted for use in userspace
+
+
+*/
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+
+#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p |= mask;
+}
+
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p &= ~mask;
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p ^= mask;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old | mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old & ~mask;
+ return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr,
+ volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old ^ mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+ return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This file tries to map numa affinity of pci devices to their interrupts
+ * In addition the PCI class information is used to refine the classification
+ * of interrupt sources
+ */
+#include "config.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include "irqbalance.h"
+
+void pci_numa_scan(void)
+{
+ DIR *dir;
+ struct dirent *entry;
+ cpumask_t mask;
+ char line[PATH_MAX];
+ FILE *file;
+ int irq;
+ unsigned int class;
+
+ dir = opendir("/sys/bus/pci/devices");
+ if (!dir)
+ return;
+ do {
+ int type;
+ entry = readdir(dir);
+ if (!entry)
+ break;
+ if (strlen(entry->d_name)<3)
+ continue;
+
+ sprintf(line,"/sys/bus/pci/devices/%s/irq", entry->d_name);
+ file = fopen(line, "r");
+ if (!file)
+ continue;
+ if (fgets(line, PATH_MAX, file)==NULL)
+ line[0]=0;
+ fclose(file);
+ irq = strtoul(line, NULL, 10);
+ if (!irq)
+ continue;
+
+ sprintf(line,"/sys/bus/pci/devices/%s/class", entry->d_name);
+ file = fopen(line, "r");
+ if (!file)
+ continue;
+ if (fgets(line, PATH_MAX, file)==NULL)
+ line[0]=0;
+ fclose(file);
+ class = strtoul(line, NULL, 16);
+
+ sprintf(line,"/sys/bus/pci/devices/%s/local_cpus", entry->d_name);
+ file = fopen(line, "r");
+ if (!file)
+ continue;
+ if (fgets(line, PATH_MAX, file)==NULL)
+ line[0]=0;
+ fclose(file);
+ cpumask_parse_user(line, strlen(line), mask);
+
+ type = IRQ_OTHER;
+ if ((class>>16) == 0x01)
+ type = IRQ_SCSI;
+/*
+ * Ethernet gets the type via /proc/net/dev; in addition down'd interfaces
+ * shouldn't boost interrupts
+ if ((class>>16) == 0x02)
+ type = IRQ_ETH;
+*/
+ if ((class>>16) >= 0x03 && (class>>16) <= 0x0C)
+ type = IRQ_LEGACY;
+
+ add_interrupt_numa(irq, mask, type);
+
+ } while (entry);
+ closedir(dir);
+}
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include "types.h"
+#include "irqbalance.h"
+
+
+int power_mode;
+
+extern GList *interrupts, *packages, *cache_domains, *cpus;
+
+static uint64_t package_cost_func(struct interrupt *irq, struct package *package)
+{
+ int bonus = 0;
+ int maxcount;
+ /* moving to a cold package/cache/etc gets you a 3000 penalty */
+ if (!cpus_intersects(irq->old_mask, package->mask))
+ bonus = CROSS_PACKAGE_PENALTY;
+
+ /* do a little numa affinity */
+ if (!cpus_intersects(irq->numa_mask, package->mask))
+ bonus += NUMA_PENALTY;
+
+ /* but if the irq has had 0 interrupts for a while move it about more easily */
+ if (irq->workload==0)
+ bonus = bonus / 10;
+
+ /* in power save mode, you better be on package 0, with overflow to the next package if really needed */
+ if (power_mode)
+ bonus += POWER_MODE_PACKAGE_THRESHOLD * package->number;
+
+ /* if we're out of whack in terms of per class counts.. just block (except in power mode) */
+ maxcount = (class_counts[irq->class] + package_count -1 ) / package_count;
+ if (package->class_count[irq->class]>=maxcount && !power_mode)
+ bonus += 300000;
+
+ /* if the package has no cpus in the allowed mask.. just block */
+ if (!cpus_intersects(irq->allowed_mask, package->mask))
+ bonus += 600000;
+
+ return irq->workload + bonus;
+}
+
+static uint64_t cache_domain_cost_func(struct interrupt *irq, struct cache_domain *cache_domain)
+{
+ int bonus = 0;
+ /* moving to a cold cache gets you a 1500 penalty */
+ if (!cpus_intersects(irq->old_mask, cache_domain->mask))
+ bonus = CROSS_PACKAGE_PENALTY/2;
+
+ /* do a little numa affinity */
+ if (!cpus_intersects(irq->numa_mask, cache_domain->mask))
+ bonus += NUMA_PENALTY;
+
+ /* but if the irq has had 0 interrupts for a while move it about more easily */
+ if (irq->workload==0)
+ bonus = bonus / 10;
+
+
+ /* pay 6000 for each previous interrupt of the same class */
+ bonus += CLASS_VIOLATION_PENTALTY * cache_domain->class_count[irq->class];
+
+ /* if the cache domain has no cpus in the allowed mask.. just block */
+ if (!cpus_intersects(irq->allowed_mask, cache_domain->mask))
+ bonus += 600000;
+
+ return irq->workload + bonus;
+}
+
+static uint64_t cpu_cost_func(struct interrupt *irq, struct cpu_core *cpu)
+{
+ int bonus = 0;
+ /* moving to a colder core gets you a 1000 penalty */
+ if (!cpus_intersects(irq->old_mask, cpu->mask))
+ bonus = CROSS_PACKAGE_PENALTY/3;
+
+ /* do a little numa affinity */
+ if (!cpus_intersects(irq->numa_mask, cpu->mask))
+ bonus += NUMA_PENALTY;
+
+ /* but if the irq has had 0 interrupts for a while move it about more easily */
+ if (irq->workload==0)
+ bonus = bonus / 10;
+
+ /*
+ * since some chipsets only place at the first cpu, give a tiny preference to non-first
+ * cpus for specifically placed interrupts
+ */
+ if (first_cpu(cpu->cache_mask)==cpu->number)
+ bonus++;
+
+ /* pay 6000 for each previous interrupt of the same class */
+ bonus += CLASS_VIOLATION_PENTALTY * cpu->class_count[irq->class];
+
+ /* if the core has no cpus in the allowed mask.. just block */
+ if (!cpus_intersects(irq->allowed_mask, cpu->mask))
+ bonus += 600000;
+
+ return irq->workload + bonus;
+}
+
+
+static void place_cache_domain(struct package *package)
+{
+ GList *iter, *next;
+ GList *pkg;
+ struct interrupt *irq;
+ struct cache_domain *cache_domain;
+
+
+ iter = g_list_first(package->interrupts);
+ while (iter) {
+ struct cache_domain *best = NULL;
+ uint64_t best_cost = INT_MAX;
+ irq = iter->data;
+
+ if (irq->balance_level <= BALANCE_PACKAGE) {
+ iter = g_list_next(iter);
+ continue;
+ }
+ pkg = g_list_first(package->cache_domains);
+ while (pkg) {
+ uint64_t newload;
+
+ cache_domain = pkg->data;
+ newload = cache_domain->workload + cache_domain_cost_func(irq, cache_domain);
+ if (newload < best_cost) {
+ best = cache_domain;
+ best_cost = newload;
+ }
+
+ pkg = g_list_next(pkg);
+ }
+ if (best) {
+ next = g_list_next(iter);
+ package->interrupts = g_list_delete_link(package->interrupts, iter);
+
+ best->workload += irq->workload + 1;
+ best->interrupts=g_list_append(best->interrupts, irq);
+ best->class_count[irq->class]++;
+ irq->mask = best->mask;
+ iter = next;
+ } else
+ iter = g_list_next(iter);
+ }
+}
+
+
+static void place_core(struct cache_domain *cache_domain)
+{
+ GList *iter, *next;
+ GList *pkg;
+ struct interrupt *irq;
+ struct cpu_core *cpu;
+
+
+ iter = g_list_first(cache_domain->interrupts);
+ while (iter) {
+ struct cpu_core *best = NULL;
+ uint64_t best_cost = INT_MAX;
+ irq = iter->data;
+
+ /* if the irq isn't per-core policy and is not very busy, leave it at cache domain level */
+ if (irq->balance_level <= BALANCE_CACHE && irq->workload < CORE_SPECIFIC_THRESHOLD && !one_shot_mode) {
+ iter = g_list_next(iter);
+ continue;
+ }
+ pkg = g_list_first(cache_domain->cpu_cores);
+ while (pkg) {
+ uint64_t newload;
+
+ cpu = pkg->data;
+ newload = cpu->workload + cpu_cost_func(irq, cpu);
+ if (newload < best_cost) {
+ best = cpu;
+ best_cost = newload;
+ }
+
+ pkg = g_list_next(pkg);
+ }
+ if (best) {
+ next = g_list_next(iter);
+ cache_domain->interrupts = g_list_delete_link(cache_domain->interrupts, iter);
+
+ best->workload += irq->workload + 1;
+ best->interrupts=g_list_append(best->interrupts, irq);
+ best->class_count[irq->class]++;
+ irq->mask = best->mask;
+ iter = next;
+ } else
+ iter = g_list_next(iter);
+ }
+}
+
+
+static void place_packages(GList *list)
+{
+ GList *iter;
+ GList *pkg;
+ struct interrupt *irq;
+ struct package *package;
+
+
+ iter = g_list_first(list);
+ while (iter) {
+ struct package *best = NULL;
+ uint64_t best_cost = INT_MAX;
+ irq = iter->data;
+ if (irq->balance_level == BALANCE_NONE) {
+ iter = g_list_next(iter);
+ continue;
+ }
+ pkg = g_list_first(packages);
+ while (pkg) {
+ uint64_t newload;
+
+ package = pkg->data;
+ newload = package->workload + package_cost_func(irq, package);
+ if (newload < best_cost) {
+ best = package;
+ best_cost = newload;
+ }
+
+ pkg = g_list_next(pkg);
+ }
+ if (best) {
+ best->workload += irq->workload + 1;
+ best->interrupts=g_list_append(best->interrupts, irq);
+ best->class_count[irq->class]++;
+ irq->mask = best->mask;
+ }
+ iter = g_list_next(iter);
+ }
+}
+
+
+static void place_affinity_hint(GList *list)
+{
+ /* still need to balance best workload within the affinity_hint mask */
+ GList *iter;
+ struct interrupt *irq;
+
+ iter = g_list_first(list);
+ while (iter) {
+ irq = iter->data;
+ if (irq->balance_level == BALANCE_NONE) {
+ iter = g_list_next(iter);
+ continue;
+ }
+ if ((!cpus_empty(irq->node_mask)) &&
+ (!cpus_equal(irq->mask, irq->node_mask)) &&
+ (!cpus_full(irq->node_mask))) {
+ irq->old_mask = irq->mask;
+ irq->mask = irq->node_mask;
+ }
+
+ iter = g_list_next(iter);
+ }
+}
+
+
+static void do_unroutables(void)
+{
+ struct package *package;
+ struct cache_domain *cache_domain;
+ struct cpu_core *cpu;
+ struct interrupt *irq;
+ GList *iter, *inter;
+
+ inter = g_list_first(interrupts);
+ while (inter) {
+ irq = inter->data;
+ inter = g_list_next(inter);
+ if (irq->balance_level != BALANCE_NONE)
+ continue;
+
+ iter = g_list_first(packages);
+ while (iter) {
+ package = iter->data;
+ if (cpus_intersects(package->mask, irq->node_mask) ||
+ cpus_intersects(package->mask, irq->mask))
+ package->workload += irq->workload;
+ iter = g_list_next(iter);
+ }
+
+ iter = g_list_first(cache_domains);
+ while (iter) {
+ cache_domain = iter->data;
+ if (cpus_intersects(cache_domain->mask, irq->node_mask)
+ || cpus_intersects(cache_domain->mask, irq->mask))
+ cache_domain->workload += irq->workload;
+ iter = g_list_next(iter);
+ }
+ iter = g_list_first(cpus);
+ while (iter) {
+ cpu = iter->data;
+ if (cpus_intersects(cpu->mask, irq->node_mask) ||
+ cpus_intersects(cpu->mask, irq->mask))
+ cpu->workload += irq->workload;
+ iter = g_list_next(iter);
+ }
+ }
+}
+
+
+void calculate_placement(void)
+{
+ struct package *package;
+ struct cache_domain *cache_domain;
+ GList *iter;
+ /* first clear old data */
+ clear_work_stats();
+ sort_irq_list();
+ do_unroutables();
+
+ place_packages(interrupts);
+ iter = g_list_first(packages);
+ while (iter) {
+ package = iter->data;
+ place_cache_domain(package);
+ iter = g_list_next(iter);
+ }
+
+ iter = g_list_first(cache_domains);
+ while (iter) {
+ cache_domain = iter->data;
+ place_core(cache_domain);
+ iter = g_list_next(iter);
+ }
+ /*
+ * if affinity_hint is populated on irq and is not set to
+ * all CPUs (meaning it's initialized), honor that above
+ * anything in the package locality/workload.
+ */
+ place_affinity_hint(interrupts);
+}
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "irqbalance.h"
+
+
+extern int power_mode;
+
+static uint64_t previous;
+
+static unsigned int hysteresis;
+
+void check_power_mode(void)
+{
+ FILE *file;
+ char *line = NULL;
+ size_t size = 0;
+ char *c;
+ uint64_t dummy, irq, softirq;
+ file = fopen("/proc/stat", "r");
+ if (!file)
+ return;
+ if (getline(&line, &size, file)==0)
+ size=0;
+ fclose(file);
+ if (!line)
+ return;
+ c=&line[4];
+ dummy = strtoull(c, &c, 10); /* user */
+ dummy = strtoull(c, &c, 10); /* nice */
+ dummy = strtoull(c, &c, 10); /* system */
+ dummy = strtoull(c, &c, 10); /* idle */
+ dummy = strtoull(c, &c, 10); /* iowait */
+ irq = strtoull(c, &c, 10); /* irq */
+ softirq = strtoull(c, &c, 10); /* softirq */
+
+
+ irq += softirq;
+ printf("IRQ delta is %lu \n", (unsigned long)(irq - previous) );
+ if (irq - previous < POWER_MODE_SOFTIRQ_THRESHOLD) {
+ hysteresis++;
+ if (hysteresis > POWER_MODE_HYSTERESIS) {
+ if (debug_mode && !power_mode)
+ printf("IRQ delta is %lu, switching to power mode \n", (unsigned long)(irq - previous) );
+ power_mode = 1;
+ }
+ } else {
+ if (debug_mode && power_mode)
+ printf("IRQ delta is %lu, switching to performance mode \n", (unsigned long)(irq - previous) );
+ power_mode = 0;
+ hysteresis = 0;
+ }
+ previous = irq;
+ free(line);
+}
+
--- /dev/null
+/*
+ * Copyright (C) 2006, Intel Corporation
+ *
+ * This file is part of irqbalance
+ *
+ * This program file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program in a file named COPYING; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "cpumask.h"
+#include "irqbalance.h"
+
+#define LINESIZE 4096
+
+void parse_proc_interrupts(void)
+{
+ FILE *file;
+ char *line = NULL;
+ size_t size = 0;
+
+ file = fopen("/proc/interrupts", "r");
+ if (!file)
+ return;
+
+ /* first line is the header we don't need; nuke it */
+ if (getline(&line, &size, file)==0) {
+ free(line);
+ return;
+ }
+
+ while (!feof(file)) {
+ int cpunr;
+ int number;
+ uint64_t count;
+ char *c, *c2;
+
+ if (getline(&line, &size, file)==0)
+ break;
+
+
+ /* lines with letters in front are special, like NMI count. Ignore */
+ if (!(line[0]==' ' || (line[0]>='0' && line[0]<='9')))
+ break;
+ c = strchr(line, ':');
+ if (!c)
+ continue;
+ *c = 0;
+ c++;
+ number = strtoul(line, NULL, 10);
+ count = 0;
+ cpunr = 0;
+
+ c2=NULL;
+ while (1) {
+ uint64_t C;
+ C = strtoull(c, &c2, 10);
+ if (c==c2) /* end of numbers */
+ break;
+ count += C;
+ c=c2;
+ cpunr++;
+ }
+ if (cpunr != core_count)
+ need_cpu_rescan = 1;
+
+ set_interrupt_count(number, count);
+ }
+ fclose(file);
+ free(line);
+}
--- /dev/null
+#ifndef _INCLUDE_GUARD_TYPES_H
+#define _INCLUDE_GUARD_TYPES_H
+
+#include <glib.h>
+
+#include "cpumask.h"
+
+#define BALANCE_NONE 0
+#define BALANCE_PACKAGE 1
+#define BALANCE_CACHE 2
+#define BALANCE_CORE 3
+
+#define IRQ_OTHER 0
+#define IRQ_LEGACY 1
+#define IRQ_SCSI 2
+#define IRQ_TIMER 3
+#define IRQ_ETH 4
+#define IRQ_GETH 5
+#define IRQ_TGETH 6
+
+
+struct package {
+ uint64_t workload;
+ int number;
+
+ cpumask_t mask;
+
+ int class_count[7];
+
+ GList *cache_domains;
+ GList *interrupts;
+};
+
+struct cache_domain {
+ uint64_t workload;
+ int number;
+
+ int marker;
+
+ cpumask_t mask;
+
+ cpumask_t package_mask;
+
+ int class_count[7];
+
+ GList *cpu_cores;
+ GList *interrupts;
+};
+
+
+struct cpu_core {
+ uint64_t workload;
+ int number;
+
+ int marker;
+
+ int class_count[7];
+
+ cpumask_t package_mask;
+ cpumask_t cache_mask;
+ cpumask_t mask;
+
+ GList *interrupts;
+};
+
+struct interrupt {
+ uint64_t workload;
+
+ int balance_level;
+
+ int number;
+ int class;
+
+ uint64_t count;
+ uint64_t old_count;
+ uint64_t extra;
+
+ cpumask_t mask;
+ cpumask_t old_mask;
+
+
+ cpumask_t numa_mask;
+ cpumask_t allowed_mask;
+
+ /* user/driver provided for smarter balancing */
+ cpumask_t node_mask;
+};
+
+
+#endif