patch-2.2.4 linux/net/core/neighbour.c

Next file: linux/net/core/rtnetlink.c
Previous file: linux/net/core/filter.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.2.3/linux/net/core/neighbour.c linux/net/core/neighbour.c
@@ -25,8 +25,36 @@
 #endif
 #include <net/neighbour.h>
 #include <net/dst.h>
+#include <net/sock.h>
 #include <linux/rtnetlink.h>
 
+/*
+   NOTE. The most unpleasent question is serialization of
+   accesses to resolved addresses. The problem is that addresses
+   are modified by bh, but they are referenced from normal
+   kernel thread. Before today no locking was made.
+   My reasoning was that corrupted address token will be copied
+   to packet with cosmologically small probability
+   (it is even difficult to estimate such small number)
+   and it is very silly to waste cycles in fast path to lock them.
+
+   But now I changed my mind, but not because previous statement
+   is wrong. Actually, neigh->ha MAY BE not opaque byte array,
+   but reference to some private data. In this case even neglibible
+   corruption probability becomes bug.
+
+   - hh cache is protected by rwlock. It assumes that
+     hh cache update procedure is short and fast, and that
+     read_lock is cheaper than start_bh_atomic().
+   - ha tokens, saved in neighbour entries, are protected
+     by bh_atomic().
+   - no protection is made in /proc reading. It is OK, because
+     /proc is broken by design in any case, and
+     corrupted output is normal behaviour there.
+
+     --ANK (981025)
+ */
+
 #define NEIGH_DEBUG 1
 
 #define NEIGH_PRINTK(x...) printk(x)
@@ -48,6 +76,7 @@
 #ifdef CONFIG_ARPD
 static void neigh_app_notify(struct neighbour *n);
 #endif
+static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev);
 
 static int neigh_glbl_allocs;
 static struct neigh_table *neigh_tables;
@@ -83,8 +112,20 @@
 
 		np = &tbl->hash_buckets[i];
 		while ((n = *np) != NULL) {
+			/* Neighbour record may be discarded if:
+			   - nobody refers to it.
+			   - it is not premanent
+			   - (NEW and probably wrong)
+			     INCOMPLETE entries are kept at least for
+			     n->parms->retrans_time, otherwise we could
+			     flood network with resolution requests.
+			     It is not clear, what is better table overflow
+			     or flooding.
+			 */
 			if (atomic_read(&n->refcnt) == 0 &&
-			    !(n->nud_state&NUD_PERMANENT)) {
+			    !(n->nud_state&NUD_PERMANENT) &&
+			    (n->nud_state != NUD_INCOMPLETE ||
+			     jiffies - n->used > n->parms->retrans_time)) {
 				*np = n->next;
 				n->tbl = NULL;
 				tbl->entries--;
@@ -149,6 +190,7 @@
 
 	del_timer(&tbl->proxy_timer);
 	skb_queue_purge(&tbl->proxy_queue);
+	pneigh_ifdown(tbl, dev);
 	end_bh_atomic();
 	return 0;
 }
@@ -295,7 +337,9 @@
 
 	for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
 		if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
+			net_serialize_enter();
 			*np = n->next;
+			net_serialize_leave();
 			if (tbl->pdestructor)
 				tbl->pdestructor(n);
 			kfree(n);
@@ -305,6 +349,30 @@
 	return -ENOENT;
 }
 
+static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev)
+{
+	struct pneigh_entry *n, **np;
+	u32 h;
+
+	for (h=0; h<=PNEIGH_HASHMASK; h++) {
+		np = &tbl->phash_buckets[h]; 
+		for (np = &tbl->phash_buckets[h]; (n=*np) != NULL; np = &n->next) {
+			if (n->dev == dev || dev == NULL) {
+				net_serialize_enter();
+				*np = n->next;
+				net_serialize_leave();
+				if (tbl->pdestructor)
+					tbl->pdestructor(n);
+				kfree(n);
+				continue;
+			}
+			np = &n->next;
+		}
+	}
+	return -ENOENT;
+}
+
+
 /*
  *	neighbour must already be out of the table;
  *
@@ -516,11 +584,11 @@
 		return;
 	}
 
-	neigh->probes++;
 	neigh->timer.expires = now + neigh->parms->retrans_time;
 	add_timer(&neigh->timer);
 
 	neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
+	neigh->probes++;
 }
 
 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
@@ -542,6 +610,7 @@
 				add_timer(&neigh->timer);
 
 				neigh->ops->solicit(neigh, skb);
+				neigh->probes++;
 			} else {
 				neigh->nud_state = NUD_FAILED;
 				if (skb)
@@ -581,8 +650,11 @@
 		neigh->dev->header_cache_update;
 
 	if (update) {
-		for (hh=neigh->hh; hh; hh=hh->hh_next)
+		for (hh=neigh->hh; hh; hh=hh->hh_next) {
+			write_lock_irq(&hh->hh_lock);
 			update(hh, neigh->dev, neigh->ha);
+			write_unlock_irq(&hh->hh_lock);
+		}
 	}
 }
 
@@ -655,7 +727,7 @@
 		del_timer(&neigh->timer);
 	neigh->nud_state = new;
 	if (lladdr != neigh->ha) {
-		memcpy(neigh->ha, lladdr, dev->addr_len);
+		memcpy(&neigh->ha, lladdr, dev->addr_len);
 		neigh_update_hhs(neigh);
 		neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
 #ifdef CONFIG_ARPD
@@ -764,14 +836,20 @@
 	__skb_pull(skb, skb->nh.raw - skb->data);
 
 	if (neigh_event_send(neigh, skb) == 0) {
+		int err;
 		struct device *dev = neigh->dev;
-		if (dev->hard_header_cache) {
+		if (dev->hard_header_cache && dst->hh == NULL) {
 			start_bh_atomic();
 			if (dst->hh == NULL)
 				neigh_hh_init(neigh, dst, dst->ops->protocol);
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
+			end_bh_atomic();
+		} else {
+			start_bh_atomic();
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
 			end_bh_atomic();
 		}
-		if (dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len) >= 0)
+		if (err >= 0)
 			return neigh->ops->queue_xmit(skb);
 		kfree_skb(skb);
 		return -EINVAL;
@@ -788,13 +866,17 @@
 
 int neigh_connected_output(struct sk_buff *skb)
 {
+	int err;
 	struct dst_entry *dst = skb->dst;
 	struct neighbour *neigh = dst->neighbour;
 	struct device *dev = neigh->dev;
 
 	__skb_pull(skb, skb->nh.raw - skb->data);
 
-	if (dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len) >= 0)
+	start_bh_atomic();
+	err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
+	end_bh_atomic();
+	if (err >= 0)
 		return neigh->ops->queue_xmit(skb);
 	kfree_skb(skb);
 	return -EINVAL;
@@ -868,7 +950,6 @@
 			}
 		}
 		p->next = tbl->parms.next;
-		/* ATOMIC_SET */
 		tbl->parms.next = p;
 	}
 	return p;
@@ -882,8 +963,9 @@
 		return;
 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
 		if (*p == parms) {
-			/* ATOMIC_SET */
+			net_serialize_enter();
 			*p = parms->next;
+			net_serialize_leave();
 #ifdef CONFIG_SYSCTL
 			neigh_sysctl_unregister(parms);
 #endif
@@ -926,14 +1008,15 @@
 	del_timer(&tbl->gc_timer);
 	del_timer(&tbl->proxy_timer);
 	skb_queue_purge(&tbl->proxy_queue);
-	if (tbl->entries)
-		neigh_ifdown(tbl, NULL);
+	neigh_ifdown(tbl, NULL);
 	end_bh_atomic();
 	if (tbl->entries)
 		printk(KERN_CRIT "neighbour leakage\n");
 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
 		if (*tp == tbl) {
+			net_serialize_enter();
 			*tp = tbl->next;
+			net_serialize_leave();
 			break;
 		}
 	}
@@ -976,7 +1059,7 @@
 			return -EINVAL;
 
 		start_bh_atomic();
-		n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+		n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 0);
 		if (n) {
 			err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
 			neigh_release(n);
@@ -1020,7 +1103,7 @@
 		    nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
 			return -EINVAL;
 		start_bh_atomic();
-		n = neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
+		n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 0);
 		if (n) {
 			if (nlh->nlmsg_flags&NLM_F_EXCL)
 				err = -EEXIST;
@@ -1091,7 +1174,7 @@
 	for (h=0; h <= NEIGH_HASHMASK; h++) {
 		if (h < s_h) continue;
 		if (h > s_h)
-			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(int));
+			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
 		start_bh_atomic();
 		for (n = tbl->hash_buckets[h], idx = 0; n;
 		     n = n->next, idx++) {
@@ -1125,7 +1208,7 @@
 		if (family && tbl->family != family)
 			continue;
 		if (t > s_t)
-			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int));
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
 		if (neigh_dump_table(tbl, skb, cb) < 0) 
 			break;
 	}

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)