]> xenbits.xensource.com Git - people/julieng/boot-wrapper-aarch64.git/commitdiff
Replace exclusive accesses with a bakery lock
authorJean-Philippe Brucker <jean-philippe.brucker@arm.com>
Thu, 10 Dec 2015 19:24:59 +0000 (19:24 +0000)
committerMark Rutland <mark.rutland@arm.com>
Wed, 15 Jun 2016 09:27:35 +0000 (10:27 +0100)
The commit prepares the removal of the MMU identity map, which was only
used for exclusive accesses (ldxr/stxr) in psci_cpu_on.

Instead of relying on exclusives, we assume that when stage-1
translation is disabled, all EL3 memory accesses have Device type, with
non-gathering and non-reordering attributes. This guarantees single-copy
atomicity of aligned halfword accesses, as per the ARM ARM.

We can thus switch to a less constrained (albeit bulkier) locking
mechanism. This patch implements Lamport's bakery lock, which doesn't
rely on atomic compare-and-swap primitives.

For bisectability, we remove the call to switch_to_idmap here.
Otherwise, the assertions made by the bakery lock code, regarding order
of accesses, are invalid. The rest of the code will be removed in a
subsequent patch.

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Makefile.am
arch/aarch64/include/asm/psci.h [deleted file]
arch/aarch64/psci.S
bakery_lock.c [new file with mode: 0644]
include/bakery_lock.h [new file with mode: 0644]
include/compiler.h
include/cpu.h
include/psci.h
psci.c

index 70dedc22d042d0f3231a7d9d190063abe70e4138..fb42c45d73a7ecdfea56c3341b58fcd131d3d0e6 100644 (file)
@@ -92,7 +92,7 @@ CFLAGS                += -Wall -fomit-frame-pointer
 CFLAGS         += -ffunction-sections -fdata-sections
 LDFLAGS                += --gc-sections
 
-OFILES         += boot_common.o ns.o $(GIC) cache.o lib.o
+OFILES         += boot_common.o bakery_lock.o ns.o $(GIC) cache.o lib.o
 OFILES         += $(addprefix $(ARCH_SRC),boot.o stack.o mmu.o $(BOOTMETHOD) utils.o)
 
 all: $(IMAGE)
diff --git a/arch/aarch64/include/asm/psci.h b/arch/aarch64/include/asm/psci.h
deleted file mode 100644 (file)
index ddab76c..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * arch/aarch64/include/asm/psci.h
- *
- * Copyright (C) 2015 ARM Limited. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE.txt file.
- */
-#ifndef __ASM_AARCH64_PSCI_H
-#define __ASM_AARCH64_PSCI_H
-
-#ifndef __ASSEMBLY__
-
-static inline int psci_store_address(unsigned long address,
-                                    unsigned long *branch_entry)
-{
-       unsigned long ret;
-
-       asm volatile (
-       "1:\n"
-       "ldxr   %0, [%2]\n"
-       "subs   %0, %0, %3\n"
-       "b.ne   2f\n"
-       "stxr   %w0, %1, [%2]\n"
-       "cbnz   %w0, 1b\n"
-       "2:\n"
-       : "=&r" (ret)
-       : "r" (address), "r" (branch_entry), "J" (PSCI_ADDR_INVALID)
-       : "cc");
-
-       if (ret != 0)
-               /* ret value comes from subs */
-               return PSCI_RET_ALREADY_ON;
-
-       return PSCI_RET_SUCCESS;
-}
-
-#endif /* !__ASSEMBLY__ */
-
-#endif
index b8d07e56b2ed8abfd2ae3e0292da0b08c589d7e7..5f7e2d77feb9707a21c7040dbc7f85ba25407658 100644 (file)
@@ -102,7 +102,6 @@ smc_exit:
 start_el3:
        ldr     x0, =vector
        bl      setup_vector
-       bl      switch_to_idmap
 
        /* only boot the primary cpu (entry 0 in the table) */
        cpuid   x0, x1
diff --git a/bakery_lock.c b/bakery_lock.c
new file mode 100644 (file)
index 0000000..877118c
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * bakery_lock.c - Lamport's bakery algorithm
+ *
+ * Copyright (C) 2015 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ *
+ *
+ * Simplest implementation of Lamport's bakery lock [1]. Applies only to device
+ * memory with attributes non-gathering and non-reordering.
+ *
+ * This algorithm's strength resides in the fact that it doesn't rely on
+ * hardware synchronisation mechanisms and as such, doesn't require normal
+ * cacheable memory on ARMv8. CPUs write only to their own memory locations,
+ * and read from all other CPUs' ones, in order to decide whose turn it is to
+ * have the lock.
+ *
+ * The algorithm correctness is based on the following assumptions:
+ *
+ * 1) Accesses to choosing[k] (here tickets[k].choosing) are done atomically.
+ *    In other words, simultaneous read and write to choosing[k] do not occur.
+ *    In this implementation, it is guaranteed by single-copy atomicity, for
+ *    accesses of type Device with non-gathering attributes. The algorithm
+ *    doesn't require accesses to number[k] to be atomic, even though this
+ *    implementation guarantees that as well.
+ *
+ * 2) Storage of number[k] allows it to become large enough for practical use of
+ *    the lock. Indeed, if the lock is contended all of the time, the value of
+ *    max(number[1..N]) will keep increasing, and this algorithm doesn't handle
+ *    wrapping of the ticket number. In this implementation, we assume that we
+ *    will never reach 32766 (0x7fff) overlapping calls to bakery_lock.
+ *
+ * [1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming Problem"
+ */
+
+#include <bakery_lock.h>
+#include <cpu.h>
+
+/*
+ * Return the result of (number_a, cpu_a) < (number_b, cpu_b)
+ */
+static unsigned int less_than(unsigned long cpu_a, unsigned long number_a,
+                             unsigned long cpu_b, unsigned long number_b)
+{
+       if (number_a == number_b)
+               return cpu_a < cpu_b;
+
+       return number_a < number_b;
+}
+
+static unsigned int choose_number(bakery_ticket_t *tickets, unsigned self)
+{
+       int cpu;
+       unsigned int max_number = 0;
+       bakery_ticket_t ticket;
+
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+               if (cpu == self)
+                       continue;
+
+               ticket = read_ticket_once(tickets[cpu]);
+
+               if (max_number < ticket.number)
+                       max_number = ticket.number;
+       }
+
+       return 1 + max_number;
+}
+
+/**
+ * Wait for our turn to enter a critical section
+ *
+ * @tickets: array of size NR_CPUS, indexed by logical IDs.
+ * @self:    logical ID of the current CPU
+ *
+ * Note: since this implementation assumes that all loads and stores to tickets
+ * are of Device type with non-gathering and non-reordering attributes, we
+ * expect all of them to be performed, in program order. As a result, the
+ * following function is pretty relaxed in terms of barriers: we only
+ * synchronize before sev(), and introduce system-wide memory barriers around
+ * the critical section.
+ */
+void bakery_lock(bakery_ticket_t *tickets, unsigned self)
+{
+       int cpu, number_self;
+       bakery_ticket_t ticket;
+
+       /* Doorway */
+       write_ticket_once(tickets[self], 1, 0);
+       number_self = choose_number(tickets, self);
+       write_ticket_once(tickets[self], 0, number_self);
+
+       dsb(st);
+       sev();
+
+       /* Bakery */
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+               uint16_t number_cpu;
+
+               if (cpu == self)
+                       continue;
+
+               ticket = read_ticket_once(tickets[cpu]);
+               while (ticket.choosing) {
+                       wfe();
+                       ticket = read_ticket_once(tickets[cpu]);
+               }
+
+               number_cpu = ticket.number;
+
+               /*
+                * Wait until that CPU updates its ticket. We only need to do
+                * the comparison once, since any update to tickets[cpu].number
+                * will be to a value greater than ours, or zero.
+                */
+               if (number_cpu != 0 && less_than(cpu,  number_cpu,
+                                                self, number_self)) {
+                       do {
+                               wfe();
+                               ticket = read_ticket_once(tickets[cpu]);
+                       } while (number_cpu == ticket.number);
+               }
+       }
+
+       dmb(sy);
+}
+
+void bakery_unlock(bakery_ticket_t *tickets, unsigned self)
+{
+       dmb(sy);
+
+       write_ticket_once(tickets[self], 0, 0);
+
+       dsb(st);
+       sev();
+}
diff --git a/include/bakery_lock.h b/include/bakery_lock.h
new file mode 100644 (file)
index 0000000..ebeb893
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * include/bakery_lock.h
+ *
+ * Copyright (C) 2015 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ */
+
+#ifndef __BAKERY_LOCK_H
+#define __BAKERY_LOCK_H
+
+#include <stdint.h>
+
+#include <compiler.h>
+
+/*
+ * We *must* access this structure with 16 or 8 bit accesses, aligned on 16-bit.
+ * Helpers read/write_ticket_once should be used for this.
+ */
+typedef union {
+       struct __packed {
+               uint16_t number         : 15;
+               uint16_t choosing       : 1;
+       };
+       uint16_t __val;
+} bakery_ticket_t;
+
+#define write_ticket_once(ticket, choosing_, number_)                  \
+({                                                                     \
+       bakery_ticket_t __t = {                                         \
+               .number = (number_),                                    \
+               .choosing = (choosing_),                                \
+       };                                                              \
+       *(volatile uint16_t *)&(ticket).__val = __t.__val;              \
+})
+
+#define read_ticket_once(ticket)                                       \
+({                                                                     \
+       bakery_ticket_t __t;                                            \
+       __t.__val = *(volatile uint16_t *)&(ticket).__val;              \
+       __t;                                                            \
+})
+
+void bakery_lock(bakery_ticket_t *tickets, unsigned self);
+void bakery_unlock(bakery_ticket_t *tickets, unsigned self);
+
+#endif
index f9ee2f3df93260d30669a0f49f70a73cabaf5fff..9d7a92a9282b68c6c8aa3271821512cfd77eb4a2 100644 (file)
@@ -14,5 +14,6 @@
 #define unreachable()  __builtin_unreachable()
 
 #define __noreturn     __attribute__((noreturn))
+#define __packed       __attribute__((packed))
 
 #endif
index b4dae949722d2eef9fc51a779e07d6b112201b57..704d6d884c93d713e4951a44d264b31eaa4c9faf 100644 (file)
@@ -16,6 +16,7 @@
 #ifndef __ASSEMBLY__
 
 #define isb()          asm volatile ("isb\n" : : : "memory")
+#define dmb(arg)       asm volatile ("dmb " #arg "\n" : : : "memory")
 #define dsb(arg)       asm volatile ("dsb " #arg "\n" : : : "memory")
 #define sev()          asm volatile ("sev\n" : : : "memory")
 #define wfe()          asm volatile ("wfe\n" : : : "memory")
index a2caa592a0d0774912ddb8c2a4e2962293a4b819..21ca5ee981aeb2f5acce66b3308f018f32e46311 100644 (file)
@@ -24,6 +24,4 @@
 
 #define PSCI_ADDR_INVALID              (-1)
 
-#include <asm/psci.h>
-
 #endif
diff --git a/psci.c b/psci.c
index 6c4757705e8187b8e221158a0730e40a923f19e9..fad6f5d7785e837074fe82251924c46ab75bdf21 100644 (file)
--- a/psci.c
+++ b/psci.c
@@ -9,6 +9,7 @@
 
 #include <stdint.h>
 
+#include <bakery_lock.h>
 #include <cpu.h>
 #include <psci.h>
 #include <spin.h>
 
 static unsigned long branch_table[NR_CPUS];
 
+bakery_ticket_t branch_table_lock[NR_CPUS];
+
+static int psci_store_address(unsigned int cpu, unsigned long address)
+{
+       if (branch_table[cpu] != PSCI_ADDR_INVALID)
+               return PSCI_RET_ALREADY_ON;
+
+       branch_table[cpu] = address;
+       return PSCI_RET_SUCCESS;
+}
+
 int psci_cpu_on(unsigned long target_mpidr, unsigned long address)
 {
        int ret;
        unsigned int cpu = find_logical_id(target_mpidr);
+       unsigned int this_cpu = find_logical_id(read_mpidr());
 
        if (cpu == MPIDR_INVALID)
                return PSCI_RET_INVALID_PARAMETERS;
 
-       ret = psci_store_address(address, branch_table + cpu);
-
-       dsb(ishst);
-       sev();
+       bakery_lock(branch_table_lock, this_cpu);
+       ret = psci_store_address(cpu, address);
+       bakery_unlock(branch_table_lock, this_cpu);
 
        return ret;
 }