--- /dev/null
+/*
+ * bakery_lock.c - Lamport's bakery algorithm
+ *
+ * Copyright (C) 2015 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ *
+ *
+ * Simplest implementation of Lamport's bakery lock [1]. Applies only to device
+ * memory with attributes non-gathering and non-reordering.
+ *
+ * This algorithm's strength resides in the fact that it doesn't rely on
+ * hardware synchronisation mechanisms and as such, doesn't require normal
+ * cacheable memory on ARMv8. CPUs write only to their own memory locations,
+ * and read from all other CPUs' ones, in order to decide whose turn it is to
+ * have the lock.
+ *
+ * The algorithm correctness is based on the following assumptions:
+ *
+ * 1) Accesses to choosing[k] (here tickets[k].choosing) are done atomically.
+ * In other words, simultaneous read and write to choosing[k] do not occur.
+ * In this implementation, it is guaranteed by single-copy atomicity, for
+ * accesses of type Device with non-gathering attributes. The algorithm
+ * doesn't require accesses to number[k] to be atomic, even though this
+ * implementation guarantees that as well.
+ *
+ * 2) Storage of number[k] allows it to become large enough for practical use of
+ * the lock. Indeed, if the lock is contended all of the time, the value of
+ * max(number[1..N]) will keep increasing, and this algorithm doesn't handle
+ * wrapping of the ticket number. In this implementation, we assume that we
+ * will never reach 32766 (0x7fff) overlapping calls to bakery_lock.
+ *
+ * [1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming Problem"
+ */
+
+#include <bakery_lock.h>
+#include <cpu.h>
+
+/*
+ * Return the result of (number_a, cpu_a) < (number_b, cpu_b)
+ */
+static unsigned int less_than(unsigned long cpu_a, unsigned long number_a,
+ unsigned long cpu_b, unsigned long number_b)
+{
+ if (number_a == number_b)
+ return cpu_a < cpu_b;
+
+ return number_a < number_b;
+}
+
+static unsigned int choose_number(bakery_ticket_t *tickets, unsigned self)
+{
+ int cpu;
+ unsigned int max_number = 0;
+ bakery_ticket_t ticket;
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (cpu == self)
+ continue;
+
+ ticket = read_ticket_once(tickets[cpu]);
+
+ if (max_number < ticket.number)
+ max_number = ticket.number;
+ }
+
+ return 1 + max_number;
+}
+
+/**
+ * Wait for our turn to enter a critical section
+ *
+ * @tickets: array of size NR_CPUS, indexed by logical IDs.
+ * @self: logical ID of the current CPU
+ *
+ * Note: since this implementation assumes that all loads and stores to tickets
+ * are of Device type with non-gathering and non-reordering attributes, we
+ * expect all of them to be performed, in program order. As a result, the
+ * following function is pretty relaxed in terms of barriers: we only
+ * synchronize before sev(), and introduce system-wide memory barriers around
+ * the critical section.
+ */
+void bakery_lock(bakery_ticket_t *tickets, unsigned self)
+{
+ int cpu, number_self;
+ bakery_ticket_t ticket;
+
+ /* Doorway */
+ write_ticket_once(tickets[self], 1, 0);
+ number_self = choose_number(tickets, self);
+ write_ticket_once(tickets[self], 0, number_self);
+
+ dsb(st);
+ sev();
+
+ /* Bakery */
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ uint16_t number_cpu;
+
+ if (cpu == self)
+ continue;
+
+ ticket = read_ticket_once(tickets[cpu]);
+ while (ticket.choosing) {
+ wfe();
+ ticket = read_ticket_once(tickets[cpu]);
+ }
+
+ number_cpu = ticket.number;
+
+ /*
+ * Wait until that CPU updates its ticket. We only need to do
+ * the comparison once, since any update to tickets[cpu].number
+ * will be to a value greater than ours, or zero.
+ */
+ if (number_cpu != 0 && less_than(cpu, number_cpu,
+ self, number_self)) {
+ do {
+ wfe();
+ ticket = read_ticket_once(tickets[cpu]);
+ } while (number_cpu == ticket.number);
+ }
+ }
+
+ dmb(sy);
+}
+
+void bakery_unlock(bakery_ticket_t *tickets, unsigned self)
+{
+ dmb(sy);
+
+ write_ticket_once(tickets[self], 0, 0);
+
+ dsb(st);
+ sev();
+}
--- /dev/null
+/*
+ * include/bakery_lock.h
+ *
+ * Copyright (C) 2015 ARM Limited. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE.txt file.
+ */
+
+#ifndef __BAKERY_LOCK_H
+#define __BAKERY_LOCK_H
+
+#include <stdint.h>
+
+#include <compiler.h>
+
+/*
+ * We *must* access this structure with 16 or 8 bit accesses, aligned on 16-bit.
+ * Helpers read/write_ticket_once should be used for this.
+ */
+typedef union {
+ struct __packed {
+ uint16_t number : 15;
+ uint16_t choosing : 1;
+ };
+ uint16_t __val;
+} bakery_ticket_t;
+
+#define write_ticket_once(ticket, choosing_, number_) \
+({ \
+ bakery_ticket_t __t = { \
+ .number = (number_), \
+ .choosing = (choosing_), \
+ }; \
+ *(volatile uint16_t *)&(ticket).__val = __t.__val; \
+})
+
+#define read_ticket_once(ticket) \
+({ \
+ bakery_ticket_t __t; \
+ __t.__val = *(volatile uint16_t *)&(ticket).__val; \
+ __t; \
+})
+
+void bakery_lock(bakery_ticket_t *tickets, unsigned self);
+void bakery_unlock(bakery_ticket_t *tickets, unsigned self);
+
+#endif
#include <stdint.h>
+#include <bakery_lock.h>
#include <cpu.h>
#include <psci.h>
#include <spin.h>
static unsigned long branch_table[NR_CPUS];
+bakery_ticket_t branch_table_lock[NR_CPUS];
+
+static int psci_store_address(unsigned int cpu, unsigned long address)
+{
+ if (branch_table[cpu] != PSCI_ADDR_INVALID)
+ return PSCI_RET_ALREADY_ON;
+
+ branch_table[cpu] = address;
+ return PSCI_RET_SUCCESS;
+}
+
int psci_cpu_on(unsigned long target_mpidr, unsigned long address)
{
int ret;
unsigned int cpu = find_logical_id(target_mpidr);
+ unsigned int this_cpu = find_logical_id(read_mpidr());
if (cpu == MPIDR_INVALID)
return PSCI_RET_INVALID_PARAMETERS;
- ret = psci_store_address(address, branch_table + cpu);
-
- dsb(ishst);
- sev();
+ bakery_lock(branch_table_lock, this_cpu);
+ ret = psci_store_address(cpu, address);
+ bakery_unlock(branch_table_lock, this_cpu);
return ret;
}