ia64/linux-2.6.18-xen.hg

view drivers/net/iseries_veth.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* File veth.c created by Kyle A. Lucke on Mon Aug 7 2000. */
2 /*
3 * IBM eServer iSeries Virtual Ethernet Device Driver
4 * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp.
5 * Substantially cleaned up by:
6 * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
7 * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation.
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
22 * USA
23 *
24 *
25 * This module implements the virtual ethernet device for iSeries LPAR
26 * Linux. It uses hypervisor message passing to implement an
27 * ethernet-like network device communicating between partitions on
28 * the iSeries.
29 *
30 * The iSeries LPAR hypervisor currently allows for up to 16 different
31 * virtual ethernets. These are all dynamically configurable on
32 * OS/400 partitions, but dynamic configuration is not supported under
33 * Linux yet. An ethXX network device will be created for each
34 * virtual ethernet this partition is connected to.
35 *
36 * - This driver is responsible for routing packets to and from other
37 * partitions. The MAC addresses used by the virtual ethernets
38 * contains meaning and must not be modified.
39 *
40 * - Having 2 virtual ethernets to the same remote partition DOES NOT
41 * double the available bandwidth. The 2 devices will share the
42 * available hypervisor bandwidth.
43 *
44 * - If you send a packet to your own mac address, it will just be
45 * dropped, you won't get it on the receive side.
46 *
47 * - Multicast is implemented by sending the frame frame to every
48 * other partition. It is the responsibility of the receiving
49 * partition to filter the addresses desired.
50 *
51 * Tunable parameters:
52 *
53 * VETH_NUMBUFFERS: This compile time option defaults to 120. It
54 * controls how much memory Linux will allocate per remote partition
55 * it is communicating with. It can be thought of as the maximum
56 * number of packets outstanding to a remote partition at a time.
57 */
59 #include <linux/module.h>
60 #include <linux/types.h>
61 #include <linux/errno.h>
62 #include <linux/ioport.h>
63 #include <linux/kernel.h>
64 #include <linux/netdevice.h>
65 #include <linux/etherdevice.h>
66 #include <linux/skbuff.h>
67 #include <linux/init.h>
68 #include <linux/delay.h>
69 #include <linux/mm.h>
70 #include <linux/ethtool.h>
71 #include <linux/if_ether.h>
73 #include <asm/abs_addr.h>
74 #include <asm/iseries/mf.h>
75 #include <asm/uaccess.h>
77 #include <asm/iseries/hv_lp_config.h>
78 #include <asm/iseries/hv_types.h>
79 #include <asm/iseries/hv_lp_event.h>
80 #include <asm/iommu.h>
81 #include <asm/vio.h>
83 #undef DEBUG
85 MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");
86 MODULE_DESCRIPTION("iSeries Virtual ethernet driver");
87 MODULE_LICENSE("GPL");
89 #define VETH_EVENT_CAP (0)
90 #define VETH_EVENT_FRAMES (1)
91 #define VETH_EVENT_MONITOR (2)
92 #define VETH_EVENT_FRAMES_ACK (3)
94 #define VETH_MAX_ACKS_PER_MSG (20)
95 #define VETH_MAX_FRAMES_PER_MSG (6)
97 struct veth_frames_data {
98 u32 addr[VETH_MAX_FRAMES_PER_MSG];
99 u16 len[VETH_MAX_FRAMES_PER_MSG];
100 u32 eofmask;
101 };
102 #define VETH_EOF_SHIFT (32-VETH_MAX_FRAMES_PER_MSG)
104 struct veth_frames_ack_data {
105 u16 token[VETH_MAX_ACKS_PER_MSG];
106 };
108 struct veth_cap_data {
109 u8 caps_version;
110 u8 rsvd1;
111 u16 num_buffers;
112 u16 ack_threshold;
113 u16 rsvd2;
114 u32 ack_timeout;
115 u32 rsvd3;
116 u64 rsvd4[3];
117 };
119 struct veth_lpevent {
120 struct HvLpEvent base_event;
121 union {
122 struct veth_cap_data caps_data;
123 struct veth_frames_data frames_data;
124 struct veth_frames_ack_data frames_ack_data;
125 } u;
127 };
129 #define DRV_NAME "iseries_veth"
130 #define DRV_VERSION "2.0"
132 #define VETH_NUMBUFFERS (120)
133 #define VETH_ACKTIMEOUT (1000000) /* microseconds */
134 #define VETH_MAX_MCAST (12)
136 #define VETH_MAX_MTU (9000)
138 #if VETH_NUMBUFFERS < 10
139 #define ACK_THRESHOLD (1)
140 #elif VETH_NUMBUFFERS < 20
141 #define ACK_THRESHOLD (4)
142 #elif VETH_NUMBUFFERS < 40
143 #define ACK_THRESHOLD (10)
144 #else
145 #define ACK_THRESHOLD (20)
146 #endif
148 #define VETH_STATE_SHUTDOWN (0x0001)
149 #define VETH_STATE_OPEN (0x0002)
150 #define VETH_STATE_RESET (0x0004)
151 #define VETH_STATE_SENTMON (0x0008)
152 #define VETH_STATE_SENTCAPS (0x0010)
153 #define VETH_STATE_GOTCAPACK (0x0020)
154 #define VETH_STATE_GOTCAPS (0x0040)
155 #define VETH_STATE_SENTCAPACK (0x0080)
156 #define VETH_STATE_READY (0x0100)
158 struct veth_msg {
159 struct veth_msg *next;
160 struct veth_frames_data data;
161 int token;
162 int in_use;
163 struct sk_buff *skb;
164 struct device *dev;
165 };
167 struct veth_lpar_connection {
168 HvLpIndex remote_lp;
169 struct work_struct statemachine_wq;
170 struct veth_msg *msgs;
171 int num_events;
172 struct veth_cap_data local_caps;
174 struct kobject kobject;
175 struct timer_list ack_timer;
177 struct timer_list reset_timer;
178 unsigned int reset_timeout;
179 unsigned long last_contact;
180 int outstanding_tx;
182 spinlock_t lock;
183 unsigned long state;
184 HvLpInstanceId src_inst;
185 HvLpInstanceId dst_inst;
186 struct veth_lpevent cap_event, cap_ack_event;
187 u16 pending_acks[VETH_MAX_ACKS_PER_MSG];
188 u32 num_pending_acks;
190 int num_ack_events;
191 struct veth_cap_data remote_caps;
192 u32 ack_timeout;
194 struct veth_msg *msg_stack_head;
195 };
197 struct veth_port {
198 struct device *dev;
199 struct net_device_stats stats;
200 u64 mac_addr;
201 HvLpIndexMap lpar_map;
203 /* queue_lock protects the stopped_map and dev's queue. */
204 spinlock_t queue_lock;
205 HvLpIndexMap stopped_map;
207 /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */
208 rwlock_t mcast_gate;
209 int promiscuous;
210 int num_mcast;
211 u64 mcast_addr[VETH_MAX_MCAST];
213 struct kobject kobject;
214 };
216 static HvLpIndex this_lp;
217 static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */
218 static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */
220 static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
221 static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);
222 static void veth_wake_queues(struct veth_lpar_connection *cnx);
223 static void veth_stop_queues(struct veth_lpar_connection *cnx);
224 static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *);
225 static void veth_release_connection(struct kobject *kobject);
226 static void veth_timed_ack(unsigned long ptr);
227 static void veth_timed_reset(unsigned long ptr);
229 /*
230 * Utility functions
231 */
233 #define veth_info(fmt, args...) \
234 printk(KERN_INFO DRV_NAME ": " fmt, ## args)
236 #define veth_error(fmt, args...) \
237 printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args)
239 #ifdef DEBUG
240 #define veth_debug(fmt, args...) \
241 printk(KERN_DEBUG DRV_NAME ": " fmt, ## args)
242 #else
243 #define veth_debug(fmt, args...) do {} while (0)
244 #endif
246 /* You must hold the connection's lock when you call this function. */
247 static inline void veth_stack_push(struct veth_lpar_connection *cnx,
248 struct veth_msg *msg)
249 {
250 msg->next = cnx->msg_stack_head;
251 cnx->msg_stack_head = msg;
252 }
254 /* You must hold the connection's lock when you call this function. */
255 static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx)
256 {
257 struct veth_msg *msg;
259 msg = cnx->msg_stack_head;
260 if (msg)
261 cnx->msg_stack_head = cnx->msg_stack_head->next;
263 return msg;
264 }
266 /* You must hold the connection's lock when you call this function. */
267 static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx)
268 {
269 return cnx->msg_stack_head == NULL;
270 }
272 static inline HvLpEvent_Rc
273 veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,
274 HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,
275 u64 token,
276 u64 data1, u64 data2, u64 data3, u64 data4, u64 data5)
277 {
278 return HvCallEvent_signalLpEventFast(cnx->remote_lp,
279 HvLpEvent_Type_VirtualLan,
280 subtype, ackind, acktype,
281 cnx->src_inst,
282 cnx->dst_inst,
283 token, data1, data2, data3,
284 data4, data5);
285 }
287 static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,
288 u16 subtype, u64 token, void *data)
289 {
290 u64 *p = (u64 *) data;
292 return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,
293 HvLpEvent_AckType_ImmediateAck,
294 token, p[0], p[1], p[2], p[3], p[4]);
295 }
297 struct veth_allocation {
298 struct completion c;
299 int num;
300 };
302 static void veth_complete_allocation(void *parm, int number)
303 {
304 struct veth_allocation *vc = (struct veth_allocation *)parm;
306 vc->num = number;
307 complete(&vc->c);
308 }
310 static int veth_allocate_events(HvLpIndex rlp, int number)
311 {
312 struct veth_allocation vc = { COMPLETION_INITIALIZER(vc.c), 0 };
314 mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan,
315 sizeof(struct veth_lpevent), number,
316 &veth_complete_allocation, &vc);
317 wait_for_completion(&vc.c);
319 return vc.num;
320 }
322 /*
323 * sysfs support
324 */
326 struct veth_cnx_attribute {
327 struct attribute attr;
328 ssize_t (*show)(struct veth_lpar_connection *, char *buf);
329 ssize_t (*store)(struct veth_lpar_connection *, const char *buf);
330 };
332 static ssize_t veth_cnx_attribute_show(struct kobject *kobj,
333 struct attribute *attr, char *buf)
334 {
335 struct veth_cnx_attribute *cnx_attr;
336 struct veth_lpar_connection *cnx;
338 cnx_attr = container_of(attr, struct veth_cnx_attribute, attr);
339 cnx = container_of(kobj, struct veth_lpar_connection, kobject);
341 if (!cnx_attr->show)
342 return -EIO;
344 return cnx_attr->show(cnx, buf);
345 }
347 #define CUSTOM_CNX_ATTR(_name, _format, _expression) \
348 static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\
349 { \
350 return sprintf(buf, _format, _expression); \
351 } \
352 struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name)
354 #define SIMPLE_CNX_ATTR(_name) \
355 CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name)
357 SIMPLE_CNX_ATTR(outstanding_tx);
358 SIMPLE_CNX_ATTR(remote_lp);
359 SIMPLE_CNX_ATTR(num_events);
360 SIMPLE_CNX_ATTR(src_inst);
361 SIMPLE_CNX_ATTR(dst_inst);
362 SIMPLE_CNX_ATTR(num_pending_acks);
363 SIMPLE_CNX_ATTR(num_ack_events);
364 CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout));
365 CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout));
366 CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state);
367 CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ?
368 jiffies_to_msecs(jiffies - cnx->last_contact) : 0);
370 #define GET_CNX_ATTR(_name) (&veth_cnx_attr_##_name.attr)
372 static struct attribute *veth_cnx_default_attrs[] = {
373 GET_CNX_ATTR(outstanding_tx),
374 GET_CNX_ATTR(remote_lp),
375 GET_CNX_ATTR(num_events),
376 GET_CNX_ATTR(reset_timeout),
377 GET_CNX_ATTR(last_contact),
378 GET_CNX_ATTR(state),
379 GET_CNX_ATTR(src_inst),
380 GET_CNX_ATTR(dst_inst),
381 GET_CNX_ATTR(num_pending_acks),
382 GET_CNX_ATTR(num_ack_events),
383 GET_CNX_ATTR(ack_timeout),
384 NULL
385 };
387 static struct sysfs_ops veth_cnx_sysfs_ops = {
388 .show = veth_cnx_attribute_show
389 };
391 static struct kobj_type veth_lpar_connection_ktype = {
392 .release = veth_release_connection,
393 .sysfs_ops = &veth_cnx_sysfs_ops,
394 .default_attrs = veth_cnx_default_attrs
395 };
397 struct veth_port_attribute {
398 struct attribute attr;
399 ssize_t (*show)(struct veth_port *, char *buf);
400 ssize_t (*store)(struct veth_port *, const char *buf);
401 };
403 static ssize_t veth_port_attribute_show(struct kobject *kobj,
404 struct attribute *attr, char *buf)
405 {
406 struct veth_port_attribute *port_attr;
407 struct veth_port *port;
409 port_attr = container_of(attr, struct veth_port_attribute, attr);
410 port = container_of(kobj, struct veth_port, kobject);
412 if (!port_attr->show)
413 return -EIO;
415 return port_attr->show(port, buf);
416 }
418 #define CUSTOM_PORT_ATTR(_name, _format, _expression) \
419 static ssize_t _name##_show(struct veth_port *port, char *buf) \
420 { \
421 return sprintf(buf, _format, _expression); \
422 } \
423 struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name)
425 #define SIMPLE_PORT_ATTR(_name) \
426 CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name)
428 SIMPLE_PORT_ATTR(promiscuous);
429 SIMPLE_PORT_ATTR(num_mcast);
430 CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map);
431 CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map);
432 CUSTOM_PORT_ATTR(mac_addr, "0x%lX\n", port->mac_addr);
434 #define GET_PORT_ATTR(_name) (&veth_port_attr_##_name.attr)
435 static struct attribute *veth_port_default_attrs[] = {
436 GET_PORT_ATTR(mac_addr),
437 GET_PORT_ATTR(lpar_map),
438 GET_PORT_ATTR(stopped_map),
439 GET_PORT_ATTR(promiscuous),
440 GET_PORT_ATTR(num_mcast),
441 NULL
442 };
444 static struct sysfs_ops veth_port_sysfs_ops = {
445 .show = veth_port_attribute_show
446 };
448 static struct kobj_type veth_port_ktype = {
449 .sysfs_ops = &veth_port_sysfs_ops,
450 .default_attrs = veth_port_default_attrs
451 };
453 /*
454 * LPAR connection code
455 */
457 static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx)
458 {
459 schedule_work(&cnx->statemachine_wq);
460 }
462 static void veth_take_cap(struct veth_lpar_connection *cnx,
463 struct veth_lpevent *event)
464 {
465 unsigned long flags;
467 spin_lock_irqsave(&cnx->lock, flags);
468 /* Receiving caps may mean the other end has just come up, so
469 * we need to reload the instance ID of the far end */
470 cnx->dst_inst =
471 HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,
472 HvLpEvent_Type_VirtualLan);
474 if (cnx->state & VETH_STATE_GOTCAPS) {
475 veth_error("Received a second capabilities from LPAR %d.\n",
476 cnx->remote_lp);
477 event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;
478 HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
479 } else {
480 memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));
481 cnx->state |= VETH_STATE_GOTCAPS;
482 veth_kick_statemachine(cnx);
483 }
484 spin_unlock_irqrestore(&cnx->lock, flags);
485 }
487 static void veth_take_cap_ack(struct veth_lpar_connection *cnx,
488 struct veth_lpevent *event)
489 {
490 unsigned long flags;
492 spin_lock_irqsave(&cnx->lock, flags);
493 if (cnx->state & VETH_STATE_GOTCAPACK) {
494 veth_error("Received a second capabilities ack from LPAR %d.\n",
495 cnx->remote_lp);
496 } else {
497 memcpy(&cnx->cap_ack_event, event,
498 sizeof(&cnx->cap_ack_event));
499 cnx->state |= VETH_STATE_GOTCAPACK;
500 veth_kick_statemachine(cnx);
501 }
502 spin_unlock_irqrestore(&cnx->lock, flags);
503 }
505 static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
506 struct veth_lpevent *event)
507 {
508 unsigned long flags;
510 spin_lock_irqsave(&cnx->lock, flags);
511 veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
513 /* Avoid kicking the statemachine once we're shutdown.
514 * It's unnecessary and it could break veth_stop_connection(). */
516 if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
517 cnx->state |= VETH_STATE_RESET;
518 veth_kick_statemachine(cnx);
519 }
520 spin_unlock_irqrestore(&cnx->lock, flags);
521 }
523 static void veth_handle_ack(struct veth_lpevent *event)
524 {
525 HvLpIndex rlp = event->base_event.xTargetLp;
526 struct veth_lpar_connection *cnx = veth_cnx[rlp];
528 BUG_ON(! cnx);
530 switch (event->base_event.xSubtype) {
531 case VETH_EVENT_CAP:
532 veth_take_cap_ack(cnx, event);
533 break;
534 case VETH_EVENT_MONITOR:
535 veth_take_monitor_ack(cnx, event);
536 break;
537 default:
538 veth_error("Unknown ack type %d from LPAR %d.\n",
539 event->base_event.xSubtype, rlp);
540 };
541 }
543 static void veth_handle_int(struct veth_lpevent *event)
544 {
545 HvLpIndex rlp = event->base_event.xSourceLp;
546 struct veth_lpar_connection *cnx = veth_cnx[rlp];
547 unsigned long flags;
548 int i, acked = 0;
550 BUG_ON(! cnx);
552 switch (event->base_event.xSubtype) {
553 case VETH_EVENT_CAP:
554 veth_take_cap(cnx, event);
555 break;
556 case VETH_EVENT_MONITOR:
557 /* do nothing... this'll hang out here til we're dead,
558 * and the hypervisor will return it for us. */
559 break;
560 case VETH_EVENT_FRAMES_ACK:
561 spin_lock_irqsave(&cnx->lock, flags);
563 for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {
564 u16 msgnum = event->u.frames_ack_data.token[i];
566 if (msgnum < VETH_NUMBUFFERS) {
567 veth_recycle_msg(cnx, cnx->msgs + msgnum);
568 cnx->outstanding_tx--;
569 acked++;
570 }
571 }
573 if (acked > 0) {
574 cnx->last_contact = jiffies;
575 veth_wake_queues(cnx);
576 }
578 spin_unlock_irqrestore(&cnx->lock, flags);
579 break;
580 case VETH_EVENT_FRAMES:
581 veth_receive(cnx, event);
582 break;
583 default:
584 veth_error("Unknown interrupt type %d from LPAR %d.\n",
585 event->base_event.xSubtype, rlp);
586 };
587 }
589 static void veth_handle_event(struct HvLpEvent *event, struct pt_regs *regs)
590 {
591 struct veth_lpevent *veth_event = (struct veth_lpevent *)event;
593 if (hvlpevent_is_ack(event))
594 veth_handle_ack(veth_event);
595 else
596 veth_handle_int(veth_event);
597 }
599 static int veth_process_caps(struct veth_lpar_connection *cnx)
600 {
601 struct veth_cap_data *remote_caps = &cnx->remote_caps;
602 int num_acks_needed;
604 /* Convert timer to jiffies */
605 cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000;
607 if ( (remote_caps->num_buffers == 0)
608 || (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG)
609 || (remote_caps->ack_threshold == 0)
610 || (cnx->ack_timeout == 0) ) {
611 veth_error("Received incompatible capabilities from LPAR %d.\n",
612 cnx->remote_lp);
613 return HvLpEvent_Rc_InvalidSubtypeData;
614 }
616 num_acks_needed = (remote_caps->num_buffers
617 / remote_caps->ack_threshold) + 1;
619 /* FIXME: locking on num_ack_events? */
620 if (cnx->num_ack_events < num_acks_needed) {
621 int num;
623 num = veth_allocate_events(cnx->remote_lp,
624 num_acks_needed-cnx->num_ack_events);
625 if (num > 0)
626 cnx->num_ack_events += num;
628 if (cnx->num_ack_events < num_acks_needed) {
629 veth_error("Couldn't allocate enough ack events "
630 "for LPAR %d.\n", cnx->remote_lp);
632 return HvLpEvent_Rc_BufferNotAvailable;
633 }
634 }
637 return HvLpEvent_Rc_Good;
638 }
640 /* FIXME: The gotos here are a bit dubious */
641 static void veth_statemachine(void *p)
642 {
643 struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)p;
644 int rlp = cnx->remote_lp;
645 int rc;
647 spin_lock_irq(&cnx->lock);
649 restart:
650 if (cnx->state & VETH_STATE_RESET) {
651 if (cnx->state & VETH_STATE_OPEN)
652 HvCallEvent_closeLpEventPath(cnx->remote_lp,
653 HvLpEvent_Type_VirtualLan);
655 /*
656 * Reset ack data. This prevents the ack_timer actually
657 * doing anything, even if it runs one more time when
658 * we drop the lock below.
659 */
660 memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
661 cnx->num_pending_acks = 0;
663 cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON
664 | VETH_STATE_OPEN | VETH_STATE_SENTCAPS
665 | VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS
666 | VETH_STATE_SENTCAPACK | VETH_STATE_READY);
668 /* Clean up any leftover messages */
669 if (cnx->msgs) {
670 int i;
671 for (i = 0; i < VETH_NUMBUFFERS; ++i)
672 veth_recycle_msg(cnx, cnx->msgs + i);
673 }
675 cnx->outstanding_tx = 0;
676 veth_wake_queues(cnx);
678 /* Drop the lock so we can do stuff that might sleep or
679 * take other locks. */
680 spin_unlock_irq(&cnx->lock);
682 del_timer_sync(&cnx->ack_timer);
683 del_timer_sync(&cnx->reset_timer);
685 spin_lock_irq(&cnx->lock);
687 if (cnx->state & VETH_STATE_RESET)
688 goto restart;
690 /* Hack, wait for the other end to reset itself. */
691 if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
692 schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
693 goto out;
694 }
695 }
697 if (cnx->state & VETH_STATE_SHUTDOWN)
698 /* It's all over, do nothing */
699 goto out;
701 if ( !(cnx->state & VETH_STATE_OPEN) ) {
702 if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) )
703 goto cant_cope;
705 HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan);
706 cnx->src_inst =
707 HvCallEvent_getSourceLpInstanceId(rlp,
708 HvLpEvent_Type_VirtualLan);
709 cnx->dst_inst =
710 HvCallEvent_getTargetLpInstanceId(rlp,
711 HvLpEvent_Type_VirtualLan);
712 cnx->state |= VETH_STATE_OPEN;
713 }
715 if ( (cnx->state & VETH_STATE_OPEN)
716 && !(cnx->state & VETH_STATE_SENTMON) ) {
717 rc = veth_signalevent(cnx, VETH_EVENT_MONITOR,
718 HvLpEvent_AckInd_DoAck,
719 HvLpEvent_AckType_DeferredAck,
720 0, 0, 0, 0, 0, 0);
722 if (rc == HvLpEvent_Rc_Good) {
723 cnx->state |= VETH_STATE_SENTMON;
724 } else {
725 if ( (rc != HvLpEvent_Rc_PartitionDead)
726 && (rc != HvLpEvent_Rc_PathClosed) )
727 veth_error("Error sending monitor to LPAR %d, "
728 "rc = %d\n", rlp, rc);
730 /* Oh well, hope we get a cap from the other
731 * end and do better when that kicks us */
732 goto out;
733 }
734 }
736 if ( (cnx->state & VETH_STATE_OPEN)
737 && !(cnx->state & VETH_STATE_SENTCAPS)) {
738 u64 *rawcap = (u64 *)&cnx->local_caps;
740 rc = veth_signalevent(cnx, VETH_EVENT_CAP,
741 HvLpEvent_AckInd_DoAck,
742 HvLpEvent_AckType_ImmediateAck,
743 0, rawcap[0], rawcap[1], rawcap[2],
744 rawcap[3], rawcap[4]);
746 if (rc == HvLpEvent_Rc_Good) {
747 cnx->state |= VETH_STATE_SENTCAPS;
748 } else {
749 if ( (rc != HvLpEvent_Rc_PartitionDead)
750 && (rc != HvLpEvent_Rc_PathClosed) )
751 veth_error("Error sending caps to LPAR %d, "
752 "rc = %d\n", rlp, rc);
754 /* Oh well, hope we get a cap from the other
755 * end and do better when that kicks us */
756 goto out;
757 }
758 }
760 if ((cnx->state & VETH_STATE_GOTCAPS)
761 && !(cnx->state & VETH_STATE_SENTCAPACK)) {
762 struct veth_cap_data *remote_caps = &cnx->remote_caps;
764 memcpy(remote_caps, &cnx->cap_event.u.caps_data,
765 sizeof(*remote_caps));
767 spin_unlock_irq(&cnx->lock);
768 rc = veth_process_caps(cnx);
769 spin_lock_irq(&cnx->lock);
771 /* We dropped the lock, so recheck for anything which
772 * might mess us up */
773 if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN))
774 goto restart;
776 cnx->cap_event.base_event.xRc = rc;
777 HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event);
778 if (rc == HvLpEvent_Rc_Good)
779 cnx->state |= VETH_STATE_SENTCAPACK;
780 else
781 goto cant_cope;
782 }
784 if ((cnx->state & VETH_STATE_GOTCAPACK)
785 && (cnx->state & VETH_STATE_GOTCAPS)
786 && !(cnx->state & VETH_STATE_READY)) {
787 if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) {
788 /* Start the ACK timer */
789 cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
790 add_timer(&cnx->ack_timer);
791 cnx->state |= VETH_STATE_READY;
792 } else {
793 veth_error("Caps rejected by LPAR %d, rc = %d\n",
794 rlp, cnx->cap_ack_event.base_event.xRc);
795 goto cant_cope;
796 }
797 }
799 out:
800 spin_unlock_irq(&cnx->lock);
801 return;
803 cant_cope:
804 /* FIXME: we get here if something happens we really can't
805 * cope with. The link will never work once we get here, and
806 * all we can do is not lock the rest of the system up */
807 veth_error("Unrecoverable error on connection to LPAR %d, shutting down"
808 " (state = 0x%04lx)\n", rlp, cnx->state);
809 cnx->state |= VETH_STATE_SHUTDOWN;
810 spin_unlock_irq(&cnx->lock);
811 }
813 static int veth_init_connection(u8 rlp)
814 {
815 struct veth_lpar_connection *cnx;
816 struct veth_msg *msgs;
817 int i, rc;
819 if ( (rlp == this_lp)
820 || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) )
821 return 0;
823 cnx = kmalloc(sizeof(*cnx), GFP_KERNEL);
824 if (! cnx)
825 return -ENOMEM;
826 memset(cnx, 0, sizeof(*cnx));
828 cnx->remote_lp = rlp;
829 spin_lock_init(&cnx->lock);
830 INIT_WORK(&cnx->statemachine_wq, veth_statemachine, cnx);
832 init_timer(&cnx->ack_timer);
833 cnx->ack_timer.function = veth_timed_ack;
834 cnx->ack_timer.data = (unsigned long) cnx;
836 init_timer(&cnx->reset_timer);
837 cnx->reset_timer.function = veth_timed_reset;
838 cnx->reset_timer.data = (unsigned long) cnx;
839 cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000);
841 memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
843 veth_cnx[rlp] = cnx;
845 /* This gets us 1 reference, which is held on behalf of the driver
846 * infrastructure. It's released at module unload. */
847 kobject_init(&cnx->kobject);
848 cnx->kobject.ktype = &veth_lpar_connection_ktype;
849 rc = kobject_set_name(&cnx->kobject, "cnx%.2d", rlp);
850 if (rc != 0)
851 return rc;
853 msgs = kmalloc(VETH_NUMBUFFERS * sizeof(struct veth_msg), GFP_KERNEL);
854 if (! msgs) {
855 veth_error("Can't allocate buffers for LPAR %d.\n", rlp);
856 return -ENOMEM;
857 }
859 cnx->msgs = msgs;
860 memset(msgs, 0, VETH_NUMBUFFERS * sizeof(struct veth_msg));
862 for (i = 0; i < VETH_NUMBUFFERS; i++) {
863 msgs[i].token = i;
864 veth_stack_push(cnx, msgs + i);
865 }
867 cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS);
869 if (cnx->num_events < (2 + VETH_NUMBUFFERS)) {
870 veth_error("Can't allocate enough events for LPAR %d.\n", rlp);
871 return -ENOMEM;
872 }
874 cnx->local_caps.num_buffers = VETH_NUMBUFFERS;
875 cnx->local_caps.ack_threshold = ACK_THRESHOLD;
876 cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT;
878 return 0;
879 }
881 static void veth_stop_connection(struct veth_lpar_connection *cnx)
882 {
883 if (!cnx)
884 return;
886 spin_lock_irq(&cnx->lock);
887 cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN;
888 veth_kick_statemachine(cnx);
889 spin_unlock_irq(&cnx->lock);
891 /* There's a slim chance the reset code has just queued the
892 * statemachine to run in five seconds. If so we need to cancel
893 * that and requeue the work to run now. */
894 if (cancel_delayed_work(&cnx->statemachine_wq)) {
895 spin_lock_irq(&cnx->lock);
896 veth_kick_statemachine(cnx);
897 spin_unlock_irq(&cnx->lock);
898 }
900 /* Wait for the state machine to run. */
901 flush_scheduled_work();
902 }
904 static void veth_destroy_connection(struct veth_lpar_connection *cnx)
905 {
906 if (!cnx)
907 return;
909 if (cnx->num_events > 0)
910 mf_deallocate_lp_events(cnx->remote_lp,
911 HvLpEvent_Type_VirtualLan,
912 cnx->num_events,
913 NULL, NULL);
914 if (cnx->num_ack_events > 0)
915 mf_deallocate_lp_events(cnx->remote_lp,
916 HvLpEvent_Type_VirtualLan,
917 cnx->num_ack_events,
918 NULL, NULL);
920 kfree(cnx->msgs);
921 veth_cnx[cnx->remote_lp] = NULL;
922 kfree(cnx);
923 }
925 static void veth_release_connection(struct kobject *kobj)
926 {
927 struct veth_lpar_connection *cnx;
928 cnx = container_of(kobj, struct veth_lpar_connection, kobject);
929 veth_stop_connection(cnx);
930 veth_destroy_connection(cnx);
931 }
933 /*
934 * net_device code
935 */
937 static int veth_open(struct net_device *dev)
938 {
939 struct veth_port *port = (struct veth_port *) dev->priv;
941 memset(&port->stats, 0, sizeof (port->stats));
942 netif_start_queue(dev);
943 return 0;
944 }
946 static int veth_close(struct net_device *dev)
947 {
948 netif_stop_queue(dev);
949 return 0;
950 }
952 static struct net_device_stats *veth_get_stats(struct net_device *dev)
953 {
954 struct veth_port *port = (struct veth_port *) dev->priv;
956 return &port->stats;
957 }
959 static int veth_change_mtu(struct net_device *dev, int new_mtu)
960 {
961 if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU))
962 return -EINVAL;
963 dev->mtu = new_mtu;
964 return 0;
965 }
967 static void veth_set_multicast_list(struct net_device *dev)
968 {
969 struct veth_port *port = (struct veth_port *) dev->priv;
970 unsigned long flags;
972 write_lock_irqsave(&port->mcast_gate, flags);
974 if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) ||
975 (dev->mc_count > VETH_MAX_MCAST)) {
976 port->promiscuous = 1;
977 } else {
978 struct dev_mc_list *dmi = dev->mc_list;
979 int i;
981 port->promiscuous = 0;
983 /* Update table */
984 port->num_mcast = 0;
986 for (i = 0; i < dev->mc_count; i++) {
987 u8 *addr = dmi->dmi_addr;
988 u64 xaddr = 0;
990 if (addr[0] & 0x01) {/* multicast address? */
991 memcpy(&xaddr, addr, ETH_ALEN);
992 port->mcast_addr[port->num_mcast] = xaddr;
993 port->num_mcast++;
994 }
995 dmi = dmi->next;
996 }
997 }
999 write_unlock_irqrestore(&port->mcast_gate, flags);
1002 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
1004 strncpy(info->driver, DRV_NAME, sizeof(info->driver) - 1);
1005 info->driver[sizeof(info->driver) - 1] = '\0';
1006 strncpy(info->version, DRV_VERSION, sizeof(info->version) - 1);
1007 info->version[sizeof(info->version) - 1] = '\0';
1010 static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
1012 ecmd->supported = (SUPPORTED_1000baseT_Full
1013 | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
1014 ecmd->advertising = (SUPPORTED_1000baseT_Full
1015 | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
1016 ecmd->port = PORT_FIBRE;
1017 ecmd->transceiver = XCVR_INTERNAL;
1018 ecmd->phy_address = 0;
1019 ecmd->speed = SPEED_1000;
1020 ecmd->duplex = DUPLEX_FULL;
1021 ecmd->autoneg = AUTONEG_ENABLE;
1022 ecmd->maxtxpkt = 120;
1023 ecmd->maxrxpkt = 120;
1024 return 0;
1027 static u32 veth_get_link(struct net_device *dev)
1029 return 1;
1032 static struct ethtool_ops ops = {
1033 .get_drvinfo = veth_get_drvinfo,
1034 .get_settings = veth_get_settings,
1035 .get_link = veth_get_link,
1036 };
1038 static struct net_device * __init veth_probe_one(int vlan,
1039 struct vio_dev *vio_dev)
1041 struct net_device *dev;
1042 struct veth_port *port;
1043 struct device *vdev = &vio_dev->dev;
1044 int i, rc;
1045 const unsigned char *mac_addr;
1047 mac_addr = vio_get_attribute(vio_dev, "local-mac-address", NULL);
1048 if (mac_addr == NULL)
1049 mac_addr = vio_get_attribute(vio_dev, "mac-address", NULL);
1050 if (mac_addr == NULL) {
1051 veth_error("Unable to fetch MAC address from device tree.\n");
1052 return NULL;
1055 dev = alloc_etherdev(sizeof (struct veth_port));
1056 if (! dev) {
1057 veth_error("Unable to allocate net_device structure!\n");
1058 return NULL;
1061 port = (struct veth_port *) dev->priv;
1063 spin_lock_init(&port->queue_lock);
1064 rwlock_init(&port->mcast_gate);
1065 port->stopped_map = 0;
1067 for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1068 HvLpVirtualLanIndexMap map;
1070 if (i == this_lp)
1071 continue;
1072 map = HvLpConfig_getVirtualLanIndexMapForLp(i);
1073 if (map & (0x8000 >> vlan))
1074 port->lpar_map |= (1 << i);
1076 port->dev = vdev;
1078 memcpy(dev->dev_addr, mac_addr, ETH_ALEN);
1080 dev->mtu = VETH_MAX_MTU;
1082 memcpy(&port->mac_addr, mac_addr, ETH_ALEN);
1084 dev->open = veth_open;
1085 dev->hard_start_xmit = veth_start_xmit;
1086 dev->stop = veth_close;
1087 dev->get_stats = veth_get_stats;
1088 dev->change_mtu = veth_change_mtu;
1089 dev->set_mac_address = NULL;
1090 dev->set_multicast_list = veth_set_multicast_list;
1091 SET_ETHTOOL_OPS(dev, &ops);
1093 SET_NETDEV_DEV(dev, vdev);
1095 rc = register_netdev(dev);
1096 if (rc != 0) {
1097 veth_error("Failed registering net device for vlan%d.\n", vlan);
1098 free_netdev(dev);
1099 return NULL;
1102 kobject_init(&port->kobject);
1103 port->kobject.parent = &dev->class_dev.kobj;
1104 port->kobject.ktype = &veth_port_ktype;
1105 kobject_set_name(&port->kobject, "veth_port");
1106 if (0 != kobject_add(&port->kobject))
1107 veth_error("Failed adding port for %s to sysfs.\n", dev->name);
1109 veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n",
1110 dev->name, vlan, port->lpar_map);
1112 return dev;
1115 /*
1116 * Tx path
1117 */
1119 static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
1120 struct net_device *dev)
1122 struct veth_lpar_connection *cnx = veth_cnx[rlp];
1123 struct veth_port *port = (struct veth_port *) dev->priv;
1124 HvLpEvent_Rc rc;
1125 struct veth_msg *msg = NULL;
1126 unsigned long flags;
1128 if (! cnx)
1129 return 0;
1131 spin_lock_irqsave(&cnx->lock, flags);
1133 if (! (cnx->state & VETH_STATE_READY))
1134 goto no_error;
1136 if ((skb->len - ETH_HLEN) > VETH_MAX_MTU)
1137 goto drop;
1139 msg = veth_stack_pop(cnx);
1140 if (! msg)
1141 goto drop;
1143 msg->in_use = 1;
1144 msg->skb = skb_get(skb);
1146 msg->data.addr[0] = dma_map_single(port->dev, skb->data,
1147 skb->len, DMA_TO_DEVICE);
1149 if (dma_mapping_error(msg->data.addr[0]))
1150 goto recycle_and_drop;
1152 msg->dev = port->dev;
1153 msg->data.len[0] = skb->len;
1154 msg->data.eofmask = 1 << VETH_EOF_SHIFT;
1156 rc = veth_signaldata(cnx, VETH_EVENT_FRAMES, msg->token, &msg->data);
1158 if (rc != HvLpEvent_Rc_Good)
1159 goto recycle_and_drop;
1161 /* If the timer's not already running, start it now. */
1162 if (0 == cnx->outstanding_tx)
1163 mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout);
1165 cnx->last_contact = jiffies;
1166 cnx->outstanding_tx++;
1168 if (veth_stack_is_empty(cnx))
1169 veth_stop_queues(cnx);
1171 no_error:
1172 spin_unlock_irqrestore(&cnx->lock, flags);
1173 return 0;
1175 recycle_and_drop:
1176 veth_recycle_msg(cnx, msg);
1177 drop:
1178 spin_unlock_irqrestore(&cnx->lock, flags);
1179 return 1;
1182 static void veth_transmit_to_many(struct sk_buff *skb,
1183 HvLpIndexMap lpmask,
1184 struct net_device *dev)
1186 struct veth_port *port = (struct veth_port *) dev->priv;
1187 int i, success, error;
1189 success = error = 0;
1191 for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1192 if ((lpmask & (1 << i)) == 0)
1193 continue;
1195 if (veth_transmit_to_one(skb, i, dev))
1196 error = 1;
1197 else
1198 success = 1;
1201 if (error)
1202 port->stats.tx_errors++;
1204 if (success) {
1205 port->stats.tx_packets++;
1206 port->stats.tx_bytes += skb->len;
1210 static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev)
1212 unsigned char *frame = skb->data;
1213 struct veth_port *port = (struct veth_port *) dev->priv;
1214 HvLpIndexMap lpmask;
1216 if (! (frame[0] & 0x01)) {
1217 /* unicast packet */
1218 HvLpIndex rlp = frame[5];
1220 if ( ! ((1 << rlp) & port->lpar_map) ) {
1221 dev_kfree_skb(skb);
1222 return 0;
1225 lpmask = 1 << rlp;
1226 } else {
1227 lpmask = port->lpar_map;
1230 veth_transmit_to_many(skb, lpmask, dev);
1232 dev_kfree_skb(skb);
1234 return 0;
1237 /* You must hold the connection's lock when you call this function. */
1238 static void veth_recycle_msg(struct veth_lpar_connection *cnx,
1239 struct veth_msg *msg)
1241 u32 dma_address, dma_length;
1243 if (msg->in_use) {
1244 msg->in_use = 0;
1245 dma_address = msg->data.addr[0];
1246 dma_length = msg->data.len[0];
1248 if (!dma_mapping_error(dma_address))
1249 dma_unmap_single(msg->dev, dma_address, dma_length,
1250 DMA_TO_DEVICE);
1252 if (msg->skb) {
1253 dev_kfree_skb_any(msg->skb);
1254 msg->skb = NULL;
1257 memset(&msg->data, 0, sizeof(msg->data));
1258 veth_stack_push(cnx, msg);
1259 } else if (cnx->state & VETH_STATE_OPEN) {
1260 veth_error("Non-pending frame (# %d) acked by LPAR %d.\n",
1261 cnx->remote_lp, msg->token);
1265 static void veth_wake_queues(struct veth_lpar_connection *cnx)
1267 int i;
1269 for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1270 struct net_device *dev = veth_dev[i];
1271 struct veth_port *port;
1272 unsigned long flags;
1274 if (! dev)
1275 continue;
1277 port = (struct veth_port *)dev->priv;
1279 if (! (port->lpar_map & (1<<cnx->remote_lp)))
1280 continue;
1282 spin_lock_irqsave(&port->queue_lock, flags);
1284 port->stopped_map &= ~(1 << cnx->remote_lp);
1286 if (0 == port->stopped_map && netif_queue_stopped(dev)) {
1287 veth_debug("cnx %d: woke queue for %s.\n",
1288 cnx->remote_lp, dev->name);
1289 netif_wake_queue(dev);
1291 spin_unlock_irqrestore(&port->queue_lock, flags);
1295 static void veth_stop_queues(struct veth_lpar_connection *cnx)
1297 int i;
1299 for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1300 struct net_device *dev = veth_dev[i];
1301 struct veth_port *port;
1303 if (! dev)
1304 continue;
1306 port = (struct veth_port *)dev->priv;
1308 /* If this cnx is not on the vlan for this port, continue */
1309 if (! (port->lpar_map & (1 << cnx->remote_lp)))
1310 continue;
1312 spin_lock(&port->queue_lock);
1314 netif_stop_queue(dev);
1315 port->stopped_map |= (1 << cnx->remote_lp);
1317 veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n",
1318 cnx->remote_lp, dev->name, port->stopped_map);
1320 spin_unlock(&port->queue_lock);
1324 static void veth_timed_reset(unsigned long ptr)
1326 struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr;
1327 unsigned long trigger_time, flags;
1329 /* FIXME is it possible this fires after veth_stop_connection()?
1330 * That would reschedule the statemachine for 5 seconds and probably
1331 * execute it after the module's been unloaded. Hmm. */
1333 spin_lock_irqsave(&cnx->lock, flags);
1335 if (cnx->outstanding_tx > 0) {
1336 trigger_time = cnx->last_contact + cnx->reset_timeout;
1338 if (trigger_time < jiffies) {
1339 cnx->state |= VETH_STATE_RESET;
1340 veth_kick_statemachine(cnx);
1341 veth_error("%d packets not acked by LPAR %d within %d "
1342 "seconds, resetting.\n",
1343 cnx->outstanding_tx, cnx->remote_lp,
1344 cnx->reset_timeout / HZ);
1345 } else {
1346 /* Reschedule the timer */
1347 trigger_time = jiffies + cnx->reset_timeout;
1348 mod_timer(&cnx->reset_timer, trigger_time);
1352 spin_unlock_irqrestore(&cnx->lock, flags);
1355 /*
1356 * Rx path
1357 */
1359 static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr)
1361 int wanted = 0;
1362 int i;
1363 unsigned long flags;
1365 if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) )
1366 return 1;
1368 read_lock_irqsave(&port->mcast_gate, flags);
1370 if (port->promiscuous) {
1371 wanted = 1;
1372 goto out;
1375 for (i = 0; i < port->num_mcast; ++i) {
1376 if (port->mcast_addr[i] == mac_addr) {
1377 wanted = 1;
1378 break;
1382 out:
1383 read_unlock_irqrestore(&port->mcast_gate, flags);
1385 return wanted;
1388 struct dma_chunk {
1389 u64 addr;
1390 u64 size;
1391 };
1393 #define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 )
1395 static inline void veth_build_dma_list(struct dma_chunk *list,
1396 unsigned char *p, unsigned long length)
1398 unsigned long done;
1399 int i = 1;
1401 /* FIXME: skbs are continguous in real addresses. Do we
1402 * really need to break it into PAGE_SIZE chunks, or can we do
1403 * it just at the granularity of iSeries real->absolute
1404 * mapping? Indeed, given the way the allocator works, can we
1405 * count on them being absolutely contiguous? */
1406 list[0].addr = iseries_hv_addr(p);
1407 list[0].size = min(length,
1408 PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK));
1410 done = list[0].size;
1411 while (done < length) {
1412 list[i].addr = iseries_hv_addr(p + done);
1413 list[i].size = min(length-done, PAGE_SIZE);
1414 done += list[i].size;
1415 i++;
1419 static void veth_flush_acks(struct veth_lpar_connection *cnx)
1421 HvLpEvent_Rc rc;
1423 rc = veth_signaldata(cnx, VETH_EVENT_FRAMES_ACK,
1424 0, &cnx->pending_acks);
1426 if (rc != HvLpEvent_Rc_Good)
1427 veth_error("Failed acking frames from LPAR %d, rc = %d\n",
1428 cnx->remote_lp, (int)rc);
1430 cnx->num_pending_acks = 0;
1431 memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks));
1434 static void veth_receive(struct veth_lpar_connection *cnx,
1435 struct veth_lpevent *event)
1437 struct veth_frames_data *senddata = &event->u.frames_data;
1438 int startchunk = 0;
1439 int nchunks;
1440 unsigned long flags;
1441 HvLpDma_Rc rc;
1443 do {
1444 u16 length = 0;
1445 struct sk_buff *skb;
1446 struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME];
1447 struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG];
1448 u64 dest;
1449 HvLpVirtualLanIndex vlan;
1450 struct net_device *dev;
1451 struct veth_port *port;
1453 /* FIXME: do we need this? */
1454 memset(local_list, 0, sizeof(local_list));
1455 memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG));
1457 /* a 0 address marks the end of the valid entries */
1458 if (senddata->addr[startchunk] == 0)
1459 break;
1461 /* make sure that we have at least 1 EOF entry in the
1462 * remaining entries */
1463 if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) {
1464 veth_error("Missing EOF fragment in event "
1465 "eofmask = 0x%x startchunk = %d\n",
1466 (unsigned)senddata->eofmask,
1467 startchunk);
1468 break;
1471 /* build list of chunks in this frame */
1472 nchunks = 0;
1473 do {
1474 remote_list[nchunks].addr =
1475 (u64) senddata->addr[startchunk+nchunks] << 32;
1476 remote_list[nchunks].size =
1477 senddata->len[startchunk+nchunks];
1478 length += remote_list[nchunks].size;
1479 } while (! (senddata->eofmask &
1480 (1 << (VETH_EOF_SHIFT + startchunk + nchunks++))));
1482 /* length == total length of all chunks */
1483 /* nchunks == # of chunks in this frame */
1485 if ((length - ETH_HLEN) > VETH_MAX_MTU) {
1486 veth_error("Received oversize frame from LPAR %d "
1487 "(length = %d)\n",
1488 cnx->remote_lp, length);
1489 continue;
1492 skb = alloc_skb(length, GFP_ATOMIC);
1493 if (!skb)
1494 continue;
1496 veth_build_dma_list(local_list, skb->data, length);
1498 rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan,
1499 event->base_event.xSourceLp,
1500 HvLpDma_Direction_RemoteToLocal,
1501 cnx->src_inst,
1502 cnx->dst_inst,
1503 HvLpDma_AddressType_RealAddress,
1504 HvLpDma_AddressType_TceIndex,
1505 iseries_hv_addr(&local_list),
1506 iseries_hv_addr(&remote_list),
1507 length);
1508 if (rc != HvLpDma_Rc_Good) {
1509 dev_kfree_skb_irq(skb);
1510 continue;
1513 vlan = skb->data[9];
1514 dev = veth_dev[vlan];
1515 if (! dev) {
1516 /*
1517 * Some earlier versions of the driver sent
1518 * broadcasts down all connections, even to lpars
1519 * that weren't on the relevant vlan. So ignore
1520 * packets belonging to a vlan we're not on.
1521 * We can also be here if we receive packets while
1522 * the driver is going down, because then dev is NULL.
1523 */
1524 dev_kfree_skb_irq(skb);
1525 continue;
1528 port = (struct veth_port *)dev->priv;
1529 dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000;
1531 if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) {
1532 dev_kfree_skb_irq(skb);
1533 continue;
1535 if (! veth_frame_wanted(port, dest)) {
1536 dev_kfree_skb_irq(skb);
1537 continue;
1540 skb_put(skb, length);
1541 skb->dev = dev;
1542 skb->protocol = eth_type_trans(skb, dev);
1543 skb->ip_summed = CHECKSUM_NONE;
1544 netif_rx(skb); /* send it up */
1545 port->stats.rx_packets++;
1546 port->stats.rx_bytes += length;
1547 } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG);
1549 /* Ack it */
1550 spin_lock_irqsave(&cnx->lock, flags);
1551 BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG);
1553 cnx->pending_acks[cnx->num_pending_acks++] =
1554 event->base_event.xCorrelationToken;
1556 if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold)
1557 || (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) )
1558 veth_flush_acks(cnx);
1560 spin_unlock_irqrestore(&cnx->lock, flags);
1563 static void veth_timed_ack(unsigned long ptr)
1565 struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr;
1566 unsigned long flags;
1568 /* Ack all the events */
1569 spin_lock_irqsave(&cnx->lock, flags);
1570 if (cnx->num_pending_acks > 0)
1571 veth_flush_acks(cnx);
1573 /* Reschedule the timer */
1574 cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
1575 add_timer(&cnx->ack_timer);
1576 spin_unlock_irqrestore(&cnx->lock, flags);
1579 static int veth_remove(struct vio_dev *vdev)
1581 struct veth_lpar_connection *cnx;
1582 struct net_device *dev;
1583 struct veth_port *port;
1584 int i;
1586 dev = veth_dev[vdev->unit_address];
1588 if (! dev)
1589 return 0;
1591 port = netdev_priv(dev);
1593 for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1594 cnx = veth_cnx[i];
1596 if (cnx && (port->lpar_map & (1 << i))) {
1597 /* Drop our reference to connections on our VLAN */
1598 kobject_put(&cnx->kobject);
1602 veth_dev[vdev->unit_address] = NULL;
1603 kobject_del(&port->kobject);
1604 kobject_put(&port->kobject);
1605 unregister_netdev(dev);
1606 free_netdev(dev);
1608 return 0;
1611 static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1613 int i = vdev->unit_address;
1614 struct net_device *dev;
1615 struct veth_port *port;
1617 dev = veth_probe_one(i, vdev);
1618 if (dev == NULL) {
1619 veth_remove(vdev);
1620 return 1;
1622 veth_dev[i] = dev;
1624 port = (struct veth_port*)netdev_priv(dev);
1626 /* Start the state machine on each connection on this vlan. If we're
1627 * the first dev to do so this will commence link negotiation */
1628 for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1629 struct veth_lpar_connection *cnx;
1631 if (! (port->lpar_map & (1 << i)))
1632 continue;
1634 cnx = veth_cnx[i];
1635 if (!cnx)
1636 continue;
1638 kobject_get(&cnx->kobject);
1639 veth_kick_statemachine(cnx);
1642 return 0;
1645 /**
1646 * veth_device_table: Used by vio.c to match devices that we
1647 * support.
1648 */
1649 static struct vio_device_id veth_device_table[] __devinitdata = {
1650 { "network", "IBM,iSeries-l-lan" },
1651 { "", "" }
1652 };
1653 MODULE_DEVICE_TABLE(vio, veth_device_table);
1655 static struct vio_driver veth_driver = {
1656 .id_table = veth_device_table,
1657 .probe = veth_probe,
1658 .remove = veth_remove,
1659 .driver = {
1660 .name = DRV_NAME,
1661 .owner = THIS_MODULE,
1663 };
1665 /*
1666 * Module initialization/cleanup
1667 */
1669 void __exit veth_module_cleanup(void)
1671 int i;
1672 struct veth_lpar_connection *cnx;
1674 /* Disconnect our "irq" to stop events coming from the Hypervisor. */
1675 HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
1677 /* Make sure any work queued from Hypervisor callbacks is finished. */
1678 flush_scheduled_work();
1680 for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1681 cnx = veth_cnx[i];
1683 if (!cnx)
1684 continue;
1686 /* Remove the connection from sysfs */
1687 kobject_del(&cnx->kobject);
1688 /* Drop the driver's reference to the connection */
1689 kobject_put(&cnx->kobject);
1692 /* Unregister the driver, which will close all the netdevs and stop
1693 * the connections when they're no longer referenced. */
1694 vio_unregister_driver(&veth_driver);
1696 module_exit(veth_module_cleanup);
1698 int __init veth_module_init(void)
1700 int i;
1701 int rc;
1703 this_lp = HvLpConfig_getLpIndex_outline();
1705 for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1706 rc = veth_init_connection(i);
1707 if (rc != 0)
1708 goto error;
1711 HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan,
1712 &veth_handle_event);
1714 rc = vio_register_driver(&veth_driver);
1715 if (rc != 0)
1716 goto error;
1718 for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1719 struct kobject *kobj;
1721 if (!veth_cnx[i])
1722 continue;
1724 kobj = &veth_cnx[i]->kobject;
1725 kobj->parent = &veth_driver.driver.kobj;
1726 /* If the add failes, complain but otherwise continue */
1727 if (0 != kobject_add(kobj))
1728 veth_error("cnx %d: Failed adding to sysfs.\n", i);
1731 return 0;
1733 error:
1734 for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1735 veth_destroy_connection(veth_cnx[i]);
1738 return rc;
1740 module_init(veth_module_init);