patch-2.4.20 linux-2.4.20/net/ipv6/netfilter/ip6_queue.c

Next file: linux-2.4.20/net/ipv6/netfilter/ip6_tables.c
Previous file: linux-2.4.20/net/ipv6/netfilter/Makefile
Back to the patch index
Back to the overall index

diff -urN linux-2.4.19/net/ipv6/netfilter/ip6_queue.c linux-2.4.20/net/ipv6/netfilter/ip6_queue.c
@@ -15,7 +15,7 @@
  *             real coder of this.
  *             Few changes needed, mainly the hard_routing code and
  *             the netlink socket protocol (we're NETLINK_IP6_FW).
- *
+ * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -26,18 +26,12 @@
 #include <linux/netfilter.h>
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
-#include <linux/rtnetlink.h>
+#include <linux/brlock.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
 #include <net/sock.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
-
-/* We're still usign the following structs. No need to change them: */
-/*   ipq_packet_msg                                                 */
-/*   ipq_mode_msg                                                   */
-/*   ipq_verdict_msg                                                */
-/*   ipq_peer_msg                                                   */
 #include <linux/netfilter_ipv4/ip_queue.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
@@ -47,184 +41,289 @@
 #define NET_IPQ_QMAX 2088
 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
 
-typedef struct ip6q_rt_info {
+struct ipq_rt_info {
 	struct in6_addr daddr;
 	struct in6_addr saddr;
-} ip6q_rt_info_t;
+};
 
-typedef struct ip6q_queue_element {
-	struct list_head list;		/* Links element into queue */
-	int verdict;			/* Current verdict */
-	struct nf_info *info;		/* Extra info from netfilter */
-	struct sk_buff *skb;		/* Packet inside */
-	ip6q_rt_info_t rt_info;		/* May need post-mangle routing */
-} ip6q_queue_element_t;
-
-typedef int (*ip6q_send_cb_t)(ip6q_queue_element_t *e);
-
-typedef struct ip6q_peer {
-	pid_t pid;			/* PID of userland peer */
-	unsigned char died;		/* We think the peer died */
-	unsigned char copy_mode;	/* Copy packet as well as metadata? */
-	size_t copy_range;		/* Range past metadata to copy */
-	ip6q_send_cb_t send;		/* Callback for sending data to peer */
-} ip6q_peer_t;
-
-typedef struct ip6q_queue {
- 	int len;			/* Current queue len */
- 	int *maxlen;			/* Maximum queue len, via sysctl */
- 	unsigned char flushing;		/* If queue is being flushed */
- 	unsigned char terminate;	/* If the queue is being terminated */
- 	struct list_head list;		/* Head of packet queue */
- 	spinlock_t lock;		/* Queue spinlock */
- 	ip6q_peer_t peer;		/* Userland peer */
-} ip6q_queue_t;
+struct ipq_queue_entry {
+	struct list_head list;
+	struct nf_info *info;
+	struct sk_buff *skb;
+	struct ipq_rt_info rt_info;
+};
 
-/****************************************************************************
- *
- * Packet queue
- *
- ****************************************************************************/
-/* Dequeue a packet if matched by cmp, or the next available if cmp is NULL */
-static ip6q_queue_element_t *
-ip6q_dequeue(ip6q_queue_t *q,
-            int (*cmp)(ip6q_queue_element_t *, unsigned long),
-            unsigned long data)
-{
-	struct list_head *i;
-
-	spin_lock_bh(&q->lock);
-	for (i = q->list.prev; i != &q->list; i = i->prev) {
-		ip6q_queue_element_t *e = (ip6q_queue_element_t *)i;
-		
-		if (!cmp || cmp(e, data)) {
-			list_del(&e->list);
-			q->len--;
-			spin_unlock_bh(&q->lock);
-			return e;
-		}
+typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
+
+static unsigned char copy_mode = IPQ_COPY_NONE;
+static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static rwlock_t queue_lock = RW_LOCK_UNLOCKED;
+static int peer_pid;
+static unsigned int copy_range;
+static unsigned int queue_total;
+static struct sock *ipqnl;
+static LIST_HEAD(queue_list);
+static DECLARE_MUTEX(ipqnl_sem);
+
+static void
+ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+{
+	nf_reinject(entry->skb, entry->info, verdict);
+	kfree(entry);
+}
+
+static inline int
+__ipq_enqueue_entry(struct ipq_queue_entry *entry)
+{
+       if (queue_total >= queue_maxlen) {
+               if (net_ratelimit()) 
+                       printk(KERN_WARNING "ip6_queue: full at %d entries, "
+                              "dropping packet(s).\n", queue_total);
+               return -ENOSPC;
+       }
+       list_add(&entry->list, &queue_list);
+       queue_total++;
+       return 0;
+}
+
+/*
+ * Find and return a queued entry matched by cmpfn, or return the last
+ * entry if cmpfn is NULL.
+ */
+static inline struct ipq_queue_entry *
+__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct list_head *p;
+
+	list_for_each_prev(p, &queue_list) {
+		struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
+		
+		if (!cmpfn || cmpfn(entry, data))
+			return entry;
 	}
-	spin_unlock_bh(&q->lock);
 	return NULL;
 }
 
-/* Flush all packets */
-static void ip6q_flush(ip6q_queue_t *q)
+static inline void
+__ipq_dequeue_entry(struct ipq_queue_entry *entry)
 {
-	ip6q_queue_element_t *e;
-	
-	spin_lock_bh(&q->lock);
-	q->flushing = 1;
-	spin_unlock_bh(&q->lock);
-	while ((e = ip6q_dequeue(q, NULL, 0))) {
-		e->verdict = NF_DROP;
-		nf_reinject(e->skb, e->info, e->verdict);
-		kfree(e);
-	}
-	spin_lock_bh(&q->lock);
-	q->flushing = 0;
-	spin_unlock_bh(&q->lock);
-}
-
-static ip6q_queue_t *ip6q_create_queue(nf_queue_outfn_t outfn,
-                                     ip6q_send_cb_t send_cb,
-                                     int *errp, int *sysctl_qmax)
+	list_del(&entry->list);
+	queue_total--;
+}
+
+static inline struct ipq_queue_entry *
+__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
 {
-	int status;
-	ip6q_queue_t *q;
+	struct ipq_queue_entry *entry;
 
-	*errp = 0;
-	q = kmalloc(sizeof(ip6q_queue_t), GFP_KERNEL);
-	if (q == NULL) {
-		*errp = -ENOMEM;
+	entry = __ipq_find_entry(cmpfn, data);
+	if (entry == NULL)
 		return NULL;
+
+	__ipq_dequeue_entry(entry);
+	return entry;
+}
+
+
+static inline void
+__ipq_flush(int verdict)
+{
+	struct ipq_queue_entry *entry;
+	
+	while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
+		ipq_issue_verdict(entry, verdict);
+}
+
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+	int status = 0;
+	
+	switch(mode) {
+	case IPQ_COPY_NONE:
+	case IPQ_COPY_META:
+		copy_mode = mode;
+		copy_range = 0;
+		break;
+		
+	case IPQ_COPY_PACKET:
+		copy_mode = mode;
+		copy_range = range;
+		if (copy_range > 0xFFFF)
+			copy_range = 0xFFFF;
+		break;
+		
+	default:
+		status = -EINVAL;
+
 	}
-	q->peer.pid = 0;
-	q->peer.died = 0;
-	q->peer.copy_mode = IPQ_COPY_NONE;
-	q->peer.copy_range = 0;
-	q->peer.send = send_cb;
-	q->len = 0;
-	q->maxlen = sysctl_qmax;
-	q->flushing = 0;
-	q->terminate = 0;
-	INIT_LIST_HEAD(&q->list);
-	spin_lock_init(&q->lock);
-	status = nf_register_queue_handler(PF_INET6, outfn, q);
-	if (status < 0) {
-		*errp = -EBUSY;
-		kfree(q);
+	return status;
+}
+
+static inline void
+__ipq_reset(void)
+{
+	peer_pid = 0;
+	__ipq_set_mode(IPQ_COPY_NONE, 0);
+	__ipq_flush(NF_DROP);
+}
+
+static struct ipq_queue_entry *
+ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+	struct ipq_queue_entry *entry;
+	
+	write_lock_bh(&queue_lock);
+	entry = __ipq_find_dequeue_entry(cmpfn, data);
+	write_unlock_bh(&queue_lock);
+	return entry;
+}
+
+static void
+ipq_flush(int verdict)
+{
+	write_lock_bh(&queue_lock);
+	__ipq_flush(verdict);
+	write_unlock_bh(&queue_lock);
+}
+
+static struct sk_buff *
+ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
+{
+	unsigned char *old_tail;
+	size_t size = 0;
+	size_t data_len = 0;
+	struct sk_buff *skb;
+	struct ipq_packet_msg *pmsg;
+	struct nlmsghdr *nlh;
+
+	read_lock_bh(&queue_lock);
+	
+	switch (copy_mode) {
+	case IPQ_COPY_META:
+	case IPQ_COPY_NONE:
+		size = NLMSG_SPACE(sizeof(*pmsg));
+		data_len = 0;
+		break;
+	
+	case IPQ_COPY_PACKET:
+		if (copy_range == 0 || copy_range > entry->skb->len)
+			data_len = entry->skb->len;
+		else
+			data_len = copy_range;
+		
+		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+		break;
+	
+	default:
+		*errp = -EINVAL;
+		read_unlock_bh(&queue_lock);
 		return NULL;
 	}
-	return q;
+
+	read_unlock_bh(&queue_lock);
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		goto nlmsg_failure;
+		
+	old_tail= skb->tail;
+	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+	pmsg = NLMSG_DATA(nlh);
+	memset(pmsg, 0, sizeof(*pmsg));
+
+	pmsg->packet_id       = (unsigned long )entry;
+	pmsg->data_len        = data_len;
+	pmsg->timestamp_sec   = entry->skb->stamp.tv_sec;
+	pmsg->timestamp_usec  = entry->skb->stamp.tv_usec;
+	pmsg->mark            = entry->skb->nfmark;
+	pmsg->hook            = entry->info->hook;
+	pmsg->hw_protocol     = entry->skb->protocol;
+	
+	if (entry->info->indev)
+		strcpy(pmsg->indev_name, entry->info->indev->name);
+	else
+		pmsg->indev_name[0] = '\0';
+	
+	if (entry->info->outdev)
+		strcpy(pmsg->outdev_name, entry->info->outdev->name);
+	else
+		pmsg->outdev_name[0] = '\0';
+	
+	if (entry->info->indev && entry->skb->dev) {
+		pmsg->hw_type = entry->skb->dev->type;
+		if (entry->skb->dev->hard_header_parse)
+			pmsg->hw_addrlen =
+				entry->skb->dev->hard_header_parse(entry->skb,
+				                                   pmsg->hw_addr);
+	}
+	
+	if (data_len)
+		memcpy(pmsg->payload, entry->skb->data, data_len);
+		
+	nlh->nlmsg_len = skb->tail - old_tail;
+	return skb;
+
+nlmsg_failure:
+	if (skb)
+		kfree_skb(skb);
+	*errp = -EINVAL;
+	printk(KERN_ERR "ip6_queue: error creating packet message\n");
+	return NULL;
 }
 
-static int ip6q_enqueue(ip6q_queue_t *q,
-                       struct sk_buff *skb, struct nf_info *info)
+static int
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
 {
-	ip6q_queue_element_t *e;
-	int status;
-	
-	e = kmalloc(sizeof(*e), GFP_ATOMIC);
-	if (e == NULL) {
-		printk(KERN_ERR "ip6_queue: OOM in enqueue\n");
+	int status = -EINVAL;
+	struct sk_buff *nskb;
+	struct ipq_queue_entry *entry;
+
+	if (copy_mode == IPQ_COPY_NONE)
+		return -EAGAIN;
+
+	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	if (entry == NULL) {
+		printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
 		return -ENOMEM;
 	}
 
-	e->verdict = NF_DROP;
-	e->info = info;
-	e->skb = skb;
+	entry->info = info;
+	entry->skb = skb;
 
-	if (e->info->hook == NF_IP_LOCAL_OUT) {
+	if (entry->info->hook == NF_IP_LOCAL_OUT) {
 		struct ipv6hdr *iph = skb->nh.ipv6h;
 
-		e->rt_info.daddr = iph->daddr;
-		e->rt_info.saddr = iph->saddr;
+		entry->rt_info.daddr = iph->daddr;
+		entry->rt_info.saddr = iph->saddr;
 	}
 
-	spin_lock_bh(&q->lock);
-	if (q->len >= *q->maxlen) {
-		spin_unlock_bh(&q->lock);
-		if (net_ratelimit()) 
-			printk(KERN_WARNING "ip6_queue: full at %d entries, "
-			       "dropping packet(s).\n", q->len);
-		goto free_drop;
-	}
-	if (q->flushing || q->peer.copy_mode == IPQ_COPY_NONE
-	    || q->peer.pid == 0 || q->peer.died || q->terminate) {
-		spin_unlock_bh(&q->lock);
-		goto free_drop;
-	}
-	status = q->peer.send(e);
-	if (status > 0) {
-		list_add(&e->list, &q->list);
-		q->len++;
-		spin_unlock_bh(&q->lock);
-		return status;
-	}
-	spin_unlock_bh(&q->lock);
-	if (status == -ECONNREFUSED) {
-		printk(KERN_INFO "ip6_queue: peer %d died, "
-		       "resetting state and flushing queue\n", q->peer.pid);
-			q->peer.died = 1;
-			q->peer.pid = 0;
-			q->peer.copy_mode = IPQ_COPY_NONE;
-			q->peer.copy_range = 0;
-			ip6q_flush(q);
-	}
-free_drop:
-	kfree(e);
-	return -EBUSY;
-}
+	nskb = ipq_build_packet_message(entry, &status);
+	if (nskb == NULL)
+		goto err_out_free;
+		
+	write_lock_bh(&queue_lock);
+	
+	if (!peer_pid)
+		goto err_out_unlock;
 
-static void ip6q_destroy_queue(ip6q_queue_t *q)
-{
-	nf_unregister_queue_handler(PF_INET6);
-	spin_lock_bh(&q->lock);
-	q->terminate = 1;
-	spin_unlock_bh(&q->lock);
-	ip6q_flush(q);
-	kfree(q);
+	status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+	if (status < 0)
+		goto err_out_unlock;
+	
+	status = __ipq_enqueue_entry(entry);
+	if (status < 0)
+		goto err_out_unlock;
+
+	write_unlock_bh(&queue_lock);
+	return status;
+	
+err_out_unlock:
+	write_unlock_bh(&queue_lock);
+
+err_out_free:
+	kfree(entry);
+	return status;
 }
 
 /*
@@ -236,7 +335,8 @@
  *
  * If that one is modified, this one should be modified too.
  */
-static int route6_me_harder(struct sk_buff *skb)
+static int
+route6_me_harder(struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = skb->nh.ipv6h;
 	struct dst_entry *dst;
@@ -264,7 +364,9 @@
 	skb->dst = dst;
 	return 0;
 }
-static int ip6q_mangle_ipv6(ipq_verdict_msg_t *v, ip6q_queue_element_t *e)
+
+static int
+ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 {
 	int diff;
 	struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
@@ -306,357 +408,262 @@
 	 */
 	if (e->info->hook == NF_IP_LOCAL_OUT) {
 		struct ipv6hdr *iph = e->skb->nh.ipv6h;
-		if (!(   iph->daddr.in6_u.u6_addr32[0] == e->rt_info.daddr.in6_u.u6_addr32[0]
-                      && iph->daddr.in6_u.u6_addr32[1] == e->rt_info.daddr.in6_u.u6_addr32[1]
-                      && iph->daddr.in6_u.u6_addr32[2] == e->rt_info.daddr.in6_u.u6_addr32[2]
-                      && iph->daddr.in6_u.u6_addr32[3] == e->rt_info.daddr.in6_u.u6_addr32[3]
-		      && iph->saddr.in6_u.u6_addr32[0] == e->rt_info.saddr.in6_u.u6_addr32[0]
-		      && iph->saddr.in6_u.u6_addr32[1] == e->rt_info.saddr.in6_u.u6_addr32[1]
-		      && iph->saddr.in6_u.u6_addr32[2] == e->rt_info.saddr.in6_u.u6_addr32[2]
-		      && iph->saddr.in6_u.u6_addr32[3] == e->rt_info.saddr.in6_u.u6_addr32[3]))
+		if (ipv6_addr_cmp(&iph->daddr, &e->rt_info.daddr) ||
+		    ipv6_addr_cmp(&iph->saddr, &e->rt_info.saddr))
 			return route6_me_harder(e->skb);
 	}
 	return 0;
 }
 
-static inline int id_cmp(ip6q_queue_element_t *e, unsigned long id)
+static inline int
+id_cmp(struct ipq_queue_entry *e, unsigned long id)
 {
 	return (id == (unsigned long )e);
 }
 
-static int ip6q_set_verdict(ip6q_queue_t *q,
-                           ipq_verdict_msg_t *v, unsigned int len)
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
 {
-	ip6q_queue_element_t *e;
+	struct ipq_queue_entry *entry;
 
-	if (v->value > NF_MAX_VERDICT)
+	if (vmsg->value > NF_MAX_VERDICT)
 		return -EINVAL;
-	e = ip6q_dequeue(q, id_cmp, v->id);
-	if (e == NULL)
+
+	entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
+	if (entry == NULL)
 		return -ENOENT;
 	else {
-		e->verdict = v->value;
-		if (v->data_len && v->data_len == len)
-			if (ip6q_mangle_ipv6(v, e) < 0)
-				e->verdict = NF_DROP;
-		nf_reinject(e->skb, e->info, e->verdict);
-		kfree(e);
+		int verdict = vmsg->value;
+		
+		if (vmsg->data_len && vmsg->data_len == len)
+			if (ipq_mangle_ipv6(vmsg, entry) < 0)
+				verdict = NF_DROP;
+		
+		ipq_issue_verdict(entry, verdict);
 		return 0;
 	}
 }
 
-static int ip6q_receive_peer(ip6q_queue_t* q, ipq_peer_msg_t *m,
-                            unsigned char type, unsigned int len)
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
 {
+	int status;
+
+	write_lock_bh(&queue_lock);
+	status = __ipq_set_mode(mode, range);
+	write_unlock_bh(&queue_lock);
+	return status;
+}
 
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+                 unsigned char type, unsigned int len)
+{
 	int status = 0;
-	int busy;
-		
-	spin_lock_bh(&q->lock);
-	busy = (q->terminate || q->flushing);
-	spin_unlock_bh(&q->lock);
-	if (busy)
-		return -EBUSY;
-	if (len < sizeof(ipq_peer_msg_t))
+
+	if (len < sizeof(*pmsg))
 		return -EINVAL;
+
 	switch (type) {
-		case IPQM_MODE:
-			switch (m->msg.mode.value) {
-				case IPQ_COPY_META:
-					q->peer.copy_mode = IPQ_COPY_META;
-					q->peer.copy_range = 0;
-					break;
-				case IPQ_COPY_PACKET:
-					q->peer.copy_mode = IPQ_COPY_PACKET;
-					q->peer.copy_range = m->msg.mode.range;
-					if (q->peer.copy_range > 0xFFFF)
-						q->peer.copy_range = 0xFFFF;
-					break;
-				default:
-					status = -EINVAL;
-			}
-			break;
-		case IPQM_VERDICT:
-			if (m->msg.verdict.value > NF_MAX_VERDICT)
-				status = -EINVAL;
-			else
-				status = ip6q_set_verdict(q,
-				                         &m->msg.verdict,
-				                         len - sizeof(*m));
+	case IPQM_MODE:
+		status = ipq_set_mode(pmsg->msg.mode.value,
+		                      pmsg->msg.mode.range);
+		break;
+		
+	case IPQM_VERDICT:
+		if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
+			status = -EINVAL;
+		else
+			status = ipq_set_verdict(&pmsg->msg.verdict,
+			                         len - sizeof(*pmsg));
 			break;
-		default:
-			 status = -EINVAL;
+	default:
+		status = -EINVAL;
 	}
 	return status;
 }
 
-static inline int dev_cmp(ip6q_queue_element_t *e, unsigned long ifindex)
+static int
+dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
 {
-	if (e->info->indev)
-		if (e->info->indev->ifindex == ifindex)
+	if (entry->info->indev)
+		if (entry->info->indev->ifindex == ifindex)
 			return 1;
-	if (e->info->outdev)
-		if (e->info->outdev->ifindex == ifindex)
+			
+	if (entry->info->outdev)
+		if (entry->info->outdev->ifindex == ifindex)
 			return 1;
+
 	return 0;
 }
 
-/* Drop any queued packets associated with device ifindex */
-static void ip6q_dev_drop(ip6q_queue_t *q, int ifindex)
+static void
+ipq_dev_drop(int ifindex)
 {
-	ip6q_queue_element_t *e;
+	struct ipq_queue_entry *entry;
 	
-	while ((e = ip6q_dequeue(q, dev_cmp, ifindex))) {
-		e->verdict = NF_DROP;
-		nf_reinject(e->skb, e->info, e->verdict);
-		kfree(e);
-	}
-}
-
-/****************************************************************************
- *
- * Netfilter interface
- *
- ****************************************************************************/
-
-/*
- * Packets arrive here from netfilter for queuing to userspace.
- * All of them must be fed back via nf_reinject() or Alexey will kill Rusty.
- */
-static int netfilter6_receive(struct sk_buff *skb,
-                             struct nf_info *info, void *data)
-{
-	return ip6q_enqueue((ip6q_queue_t *)data, skb, info);
-}
-
-/****************************************************************************
- *
- * Netlink interface.
- *
- ****************************************************************************/
-
-static struct sock *nfnl = NULL;
-/* This is not a static one, so we should not repeat its name */
-ip6q_queue_t *nlq6 = NULL;
-
-static struct sk_buff *netlink_build_message(ip6q_queue_element_t *e, int *errp)
-{
-	unsigned char *old_tail;
-	size_t size = 0;
-	size_t data_len = 0;
-	struct sk_buff *skb;
-	ipq_packet_msg_t *pm;
-	struct nlmsghdr *nlh;
-
-	switch (nlq6->peer.copy_mode) {
-		size_t copy_range;
-
-		case IPQ_COPY_META:
-			size = NLMSG_SPACE(sizeof(*pm));
-			data_len = 0;
-			break;
-		case IPQ_COPY_PACKET:
-			copy_range = nlq6->peer.copy_range;
-			if (copy_range == 0 || copy_range > e->skb->len)
-				data_len = e->skb->len;
-			else
-				data_len = copy_range;
-			size = NLMSG_SPACE(sizeof(*pm) + data_len);
-			
-			break;
-		case IPQ_COPY_NONE:
-		default:
-			*errp = -EINVAL;
-			return NULL;
-	}
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb)
-		goto nlmsg_failure;
-	old_tail = skb->tail;
-	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
-	pm = NLMSG_DATA(nlh);
-	memset(pm, 0, sizeof(*pm));
-	pm->packet_id = (unsigned long )e;
-	pm->data_len = data_len;
-	pm->timestamp_sec = e->skb->stamp.tv_sec;
-	pm->timestamp_usec = e->skb->stamp.tv_usec;
-	pm->mark = e->skb->nfmark;
-	pm->hook = e->info->hook;
-	if (e->info->indev) strcpy(pm->indev_name, e->info->indev->name);
-	else pm->indev_name[0] = '\0';
-	if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name);
-	else pm->outdev_name[0] = '\0';
-	pm->hw_protocol = e->skb->protocol;
-	if (e->info->indev && e->skb->dev) {
-		pm->hw_type = e->skb->dev->type;
-		if (e->skb->dev->hard_header_parse)
-			pm->hw_addrlen =
-				e->skb->dev->hard_header_parse(e->skb,
-				                               pm->hw_addr);
-	}
-	if (data_len)
-		memcpy(pm->payload, e->skb->data, data_len);
-	nlh->nlmsg_len = skb->tail - old_tail;
-	NETLINK_CB(skb).dst_groups = 0;
-	return skb;
-nlmsg_failure:
-	if (skb)
-		kfree_skb(skb);
-	*errp = 0;
-	printk(KERN_ERR "ip6_queue: error creating netlink message\n");
-	return NULL;
-}
-
-static int netlink_send_peer(ip6q_queue_element_t *e)
-{
-	int status = 0;
-	struct sk_buff *skb;
-
-	skb = netlink_build_message(e, &status);
-	if (skb == NULL)
-		return status;
-	return netlink_unicast(nfnl, skb, nlq6->peer.pid, MSG_DONTWAIT);
+	while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
+		ipq_issue_verdict(entry, NF_DROP);
 }
 
 #define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
 
-static __inline__ void netlink_receive_user_skb(struct sk_buff *skb)
+static inline void
+ipq_rcv_skb(struct sk_buff *skb)
 {
-	int status, type;
+	int status, type, pid, flags, nlmsglen, skblen;
 	struct nlmsghdr *nlh;
 
-	if (skb->len < sizeof(struct nlmsghdr))
+	skblen = skb->len;
+	if (skblen < sizeof(*nlh))
 		return;
 
 	nlh = (struct nlmsghdr *)skb->data;
-	if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
-	    || skb->len < nlh->nlmsg_len)
-	    	return;
-
-	if(nlh->nlmsg_pid <= 0
-	    || !(nlh->nlmsg_flags & NLM_F_REQUEST)
-	    || nlh->nlmsg_flags & NLM_F_MULTI)
+	nlmsglen = nlh->nlmsg_len;
+	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+		return;
+
+	pid = nlh->nlmsg_pid;
+	flags = nlh->nlmsg_flags;
+	
+	if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
 		RCV_SKB_FAIL(-EINVAL);
-	if (nlh->nlmsg_flags & MSG_TRUNC)
+		
+	if (flags & MSG_TRUNC)
 		RCV_SKB_FAIL(-ECOMM);
+		
 	type = nlh->nlmsg_type;
 	if (type < NLMSG_NOOP || type >= IPQM_MAX)
 		RCV_SKB_FAIL(-EINVAL);
+		
 	if (type <= IPQM_BASE)
 		return;
+		
 	if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
-	if (nlq6->peer.pid && !nlq6->peer.died
-	    && (nlq6->peer.pid != nlh->nlmsg_pid)) {
-	    	printk(KERN_WARNING "ip6_queue: peer pid changed from %d to "
-	    	      "%d, flushing queue\n", nlq6->peer.pid, nlh->nlmsg_pid);
-		ip6q_flush(nlq6);
-	}	
-	nlq6->peer.pid = nlh->nlmsg_pid;
-	nlq6->peer.died = 0;
-	status = ip6q_receive_peer(nlq6, NLMSG_DATA(nlh),
-	                          type, skb->len - NLMSG_LENGTH(0));
+	
+	write_lock_bh(&queue_lock);
+	
+	if (peer_pid) {
+		if (peer_pid != pid) {
+			write_unlock_bh(&queue_lock);
+			RCV_SKB_FAIL(-EBUSY);
+		}
+	}
+	else
+		peer_pid = pid;
+		
+	write_unlock_bh(&queue_lock);
+	
+	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+	                          skblen - NLMSG_LENGTH(0));
 	if (status < 0)
 		RCV_SKB_FAIL(status);
-	if (nlh->nlmsg_flags & NLM_F_ACK)
+		
+	if (flags & NLM_F_ACK)
 		netlink_ack(skb, nlh, 0);
         return;
 }
 
-/* Note: we are only dealing with single part messages at the moment. */
-static void netlink_receive_user_sk(struct sock *sk, int len)
+static void
+ipq_rcv_sk(struct sock *sk, int len)
 {
 	do {
 		struct sk_buff *skb;
 
-		if (rtnl_shlock_nowait())
+		if (down_trylock(&ipqnl_sem))
 			return;
+			
 		while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
-			netlink_receive_user_skb(skb);
+			ipq_rcv_skb(skb);
 			kfree_skb(skb);
 		}
-		up(&rtnl_sem);
-	} while (nfnl && nfnl->receive_queue.qlen);
-}
+		
+		up(&ipqnl_sem);
 
-/****************************************************************************
- *
- * System events
- *
- ****************************************************************************/
+	} while (ipqnl && ipqnl->receive_queue.qlen);
+}
 
-static int receive_event(struct notifier_block *this,
-                         unsigned long event, void *ptr)
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+                  unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
 
 	/* Drop any packets associated with the downed device */
 	if (event == NETDEV_DOWN)
-		ip6q_dev_drop(nlq6, dev->ifindex);
+		ipq_dev_drop(dev->ifindex);
 	return NOTIFY_DONE;
 }
 
-struct notifier_block ip6q_dev_notifier = {
-	receive_event,
+static struct notifier_block ipq_dev_notifier = {
+	ipq_rcv_dev_event,
 	NULL,
 	0
 };
 
-/****************************************************************************
- *
- * Sysctl - queue tuning.
- *
- ****************************************************************************/
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+                 unsigned long event, void *ptr)
+{
+	struct netlink_notify *n = ptr;
+
+	if (event == NETLINK_URELEASE &&
+	    n->protocol == NETLINK_IP6_FW && n->pid) {
+		write_lock_bh(&queue_lock);
+		if (n->pid == peer_pid)
+			__ipq_reset();
+		write_unlock_bh(&queue_lock);
+	}
+	return NOTIFY_DONE;
+}
 
-static int sysctl_maxlen = IPQ_QMAX_DEFAULT;
+static struct notifier_block ipq_nl_notifier = {
+	ipq_rcv_nl_event,
+	NULL,
+	0
+};
 
-static struct ctl_table_header *ip6q_sysctl_header;
+static int sysctl_maxlen = IPQ_QMAX_DEFAULT;
+static struct ctl_table_header *ipq_sysctl_header;
 
-static ctl_table ip6q_table[] = {
+static ctl_table ipq_table[] = {
 	{ NET_IPQ_QMAX, NET_IPQ_QMAX_NAME, &sysctl_maxlen,
 	  sizeof(sysctl_maxlen), 0644,  NULL, proc_dointvec },
  	{ 0 }
 };
 
-static ctl_table ip6q_dir_table[] = {
-	{NET_IPV6, "ipv6", NULL, 0, 0555, ip6q_table, 0, 0, 0, 0, 0},
+static ctl_table ipq_dir_table[] = {
+	{NET_IPV6, "ipv6", NULL, 0, 0555, ipq_table, 0, 0, 0, 0, 0},
 	{ 0 }
 };
 
-static ctl_table ip6q_root_table[] = {
-	{CTL_NET, "net", NULL, 0, 0555, ip6q_dir_table, 0, 0, 0, 0, 0},
+static ctl_table ipq_root_table[] = {
+	{CTL_NET, "net", NULL, 0, 0555, ipq_dir_table, 0, 0, 0, 0, 0},
 	{ 0 }
 };
 
-/****************************************************************************
- *
- * Procfs - debugging info.
- *
- ****************************************************************************/
-
-static int ip6q_get_info(char *buffer, char **start, off_t offset, int length)
+static int
+ipq_get_info(char *buffer, char **start, off_t offset, int length)
 {
 	int len;
 
-	spin_lock_bh(&nlq6->lock);
+	read_lock_bh(&queue_lock);
+	
 	len = sprintf(buffer,
-	              "Peer pid            : %d\n"
-	              "Peer died           : %d\n"
-	              "Peer copy mode      : %d\n"
-	              "Peer copy range     : %Zu\n"
-	              "Queue length        : %d\n"
-	              "Queue max. length   : %d\n"
-	              "Queue flushing      : %d\n"
-	              "Queue terminate     : %d\n",
-	              nlq6->peer.pid,
-	              nlq6->peer.died,
-	              nlq6->peer.copy_mode,
-	              nlq6->peer.copy_range,
-	              nlq6->len,
-	              *nlq6->maxlen,
-	              nlq6->flushing,
-	              nlq6->terminate);
-	spin_unlock_bh(&nlq6->lock);
+	              "Peer PID          : %d\n"
+	              "Copy mode         : %hu\n"
+	              "Copy range        : %u\n"
+	              "Queue length      : %u\n"
+	              "Queue max. length : %u\n",
+	              peer_pid,
+	              copy_mode,
+	              copy_range,
+	              queue_total,
+	              queue_maxlen);
+
+	read_unlock_bh(&queue_lock);
+	
 	*start = buffer + offset;
 	len -= offset;
 	if (len > length)
@@ -666,52 +673,70 @@
 	return len;
 }
 
-/****************************************************************************
- *
- * Module stuff.
- *
- ****************************************************************************/
-
-static int __init init(void)
+static int
+init_or_cleanup(int init)
 {
-	int status = 0;
+	int status = -ENOMEM;
 	struct proc_dir_entry *proc;
 	
-        /* We must create the NETLINK_IP6_FW protocol service */
-	nfnl = netlink_kernel_create(NETLINK_IP6_FW, netlink_receive_user_sk);
-	if (nfnl == NULL) {
-		printk(KERN_ERR "ip6_queue: initialisation failed: unable to "
-		       "create kernel netlink socket\n");
-		return -ENOMEM;
+	if (!init)
+		goto cleanup;
+
+	netlink_register_notifier(&ipq_nl_notifier);
+	ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
+	if (ipqnl == NULL) {
+		printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
+		goto cleanup_netlink_notifier;
 	}
-	nlq6 = ip6q_create_queue(netfilter6_receive,
-	                       netlink_send_peer, &status, &sysctl_maxlen);
-	if (nlq6 == NULL) {
-		printk(KERN_ERR "ip6_queue: initialisation failed: unable to "
-		       "create queue\n");
-		sock_release(nfnl->socket);
-		return status;
-	}
-        /* The file will be /proc/net/ip6_queue */
-	proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ip6q_get_info);
-	if (proc) proc->owner = THIS_MODULE;
+
+	proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
+	if (proc)
+		proc->owner = THIS_MODULE;
 	else {
-		ip6q_destroy_queue(nlq6);
-		sock_release(nfnl->socket);
-		return -ENOMEM;
+		printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
+		goto cleanup_ipqnl;
+	}
+	
+	register_netdevice_notifier(&ipq_dev_notifier);
+	ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
+	
+	status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL);
+	if (status < 0) {
+		printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
+		goto cleanup_sysctl;
 	}
-	register_netdevice_notifier(&ip6q_dev_notifier);
-	ip6q_sysctl_header = register_sysctl_table(ip6q_root_table, 0);
+	return status;
+
+cleanup:
+	nf_unregister_queue_handler(PF_INET6);
+	br_write_lock_bh(BR_NETPROTO_LOCK);
+	br_write_unlock_bh(BR_NETPROTO_LOCK);
+	ipq_flush(NF_DROP);
+	
+cleanup_sysctl:
+	unregister_sysctl_table(ipq_sysctl_header);
+	unregister_netdevice_notifier(&ipq_dev_notifier);
+	proc_net_remove(IPQ_PROC_FS_NAME);
+	
+cleanup_ipqnl:
+	sock_release(ipqnl->socket);
+	down(&ipqnl_sem);
+	up(&ipqnl_sem);
+	
+cleanup_netlink_notifier:
+	netlink_unregister_notifier(&ipq_nl_notifier);
 	return status;
 }
 
+static int __init init(void)
+{
+	
+	return init_or_cleanup(1);
+}
+
 static void __exit fini(void)
 {
-	unregister_sysctl_table(ip6q_sysctl_header);
-	proc_net_remove(IPQ_PROC_FS_NAME);
-	unregister_netdevice_notifier(&ip6q_dev_notifier);
-	ip6q_destroy_queue(nlq6);
-	sock_release(nfnl->socket);
+	init_or_cleanup(0);
 }
 
 MODULE_DESCRIPTION("IPv6 packet queue handler");

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)