[Drbd-dev] DRBD on container, lxc

Ken-ichirou MATSUZAWA chamas at h4.dion.ne.jp
Mon Mar 21 13:27:27 CET 2011


 Hello Evgeniy

Thank you for quick and valuable advice.

At Mon, 21 Mar 2011 08:27:00 +0300,
Evgeniy Polyakov wrote:
> You are holding rcu lock during skb allocation, which is not permitted
> for GFP_KERNEL, which may lead to a deadlock. Also netlink_broadcast()

I had tried using mutex at first but it was failed on
Documentation/connector/cn_test which call cn_netlink_send() from
timer, softirq. Conseqently I use rcu lock but I did not know which
is not permmited for GFP_KERNEL allocation.

I was thinking of lock issue and occured non-broadcating, spcifying
netns on calling cn_netlink_send(), appended.

But it will not work with multiple container in this way, I am
having thought what to do... copy list locally in the function? 
Then, I have got mail

> In the upcoming drbd 8.4 and further,
> we abandoned connector, and switched to generic netlink,

My starting problem will solve.

Once again, thank you for valuable advice.

---
 Documentation/connector/cn_test.c      |    9 ++-
 drivers/block/drbd/drbd_nl.c           |   13 ++--
 drivers/connector/cn_proc.c            |   12 ++--
 drivers/connector/connector.c          |  136 +++++++++++++++++++++++++++++++-
 drivers/md/dm-log-userspace-transfer.c |    2 +-
 drivers/staging/pohmelfs/config.c      |    2 +-
 drivers/video/uvesafb.c                |    4 +-
 drivers/w1/w1_netlink.c                |   14 ++--
 include/linux/connector.h              |    8 ++-
 9 files changed, 171 insertions(+), 29 deletions(-)

diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index 7764594..72fe63e 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -30,6 +30,9 @@
 
 #include <linux/connector.h>
 
+#include <net/net_namespace.h>
+#include <linux/nsproxy.h>
+
 static struct cb_id cn_test_id = { CN_NETLINK_USERS + 3, 0x456 };
 static char cn_test_name[] = "cn_test";
 static struct sock *nls;
@@ -130,6 +133,7 @@ static void cn_test_timer_func(unsigned long __data)
 {
 	struct cn_msg *m;
 	char data[32];
+	struct net *net = (struct net *) __data;
 
 	pr_debug("%s: timer fired with data %lu\n", __func__, __data);
 
@@ -146,7 +150,7 @@ static void cn_test_timer_func(unsigned long __data)
 
 		memcpy(m + 1, data, m->len);
 
-		cn_netlink_send(m, 0, GFP_ATOMIC);
+		cn_netlink_send(m, 0, GFP_ATOMIC, net);
 		kfree(m);
 	}
 
@@ -169,7 +173,8 @@ static int cn_test_init(void)
 		goto err_out;
 	}
 
-	setup_timer(&cn_test_timer, cn_test_timer_func, 0);
+	setup_timer(&cn_test_timer, cn_test_timer_func,
+		    (unsigned long) current->nsproxy->net_ns);
 	mod_timer(&cn_test_timer, jiffies + msecs_to_jiffies(1000));
 
 	pr_info("initialized with id={%u.%u}\n",
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 8cbfaa6..f8a921a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -40,6 +40,7 @@
 #include <linux/drbd_limits.h>
 #include <linux/compiler.h>
 #include <linux/kthread.h>
+#include <linux/nsproxy.h>
 
 static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
 static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
@@ -2226,7 +2227,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
 	cn_reply->flags = 0;
 
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
+	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL, &init_net);
 	if (rr && rr != -ESRCH)
 		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
 
@@ -2319,7 +2320,7 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
 	reply->minor = mdev_to_minor(mdev);
 	reply->ret_code = NO_ERROR;
 
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO,  &init_net);
 }
 
 void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
@@ -2351,7 +2352,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
 	reply->minor = mdev_to_minor(mdev);
 	reply->ret_code = NO_ERROR;
 
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
 }
 
 void drbd_bcast_ee(struct drbd_conf *mdev,
@@ -2426,7 +2427,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
 	reply->minor = mdev_to_minor(mdev);
 	reply->ret_code = NO_ERROR;
 
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
 	kfree(cn_reply);
 }
 
@@ -2465,7 +2466,7 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev)
 	reply->minor = mdev_to_minor(mdev);
 	reply->ret_code = NO_ERROR;
 
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
 }
 
 int __init drbd_nl_init(void)
@@ -2518,7 +2519,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
 	reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
 	reply->ret_code = ret_code;
 
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
 	if (rr && rr != -ESRCH)
 		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
 }
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 2b46a7e..09715a9 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -74,7 +74,7 @@ void proc_fork_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	/*  If cn_netlink_send() failed, the data is not sent */
-	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
 }
 
 void proc_exec_connector(struct task_struct *task)
@@ -99,7 +99,7 @@ void proc_exec_connector(struct task_struct *task)
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
-	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
 }
 
 void proc_id_connector(struct task_struct *task, int which_id)
@@ -138,7 +138,7 @@ void proc_id_connector(struct task_struct *task, int which_id)
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
-	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
 }
 
 void proc_sid_connector(struct task_struct *task)
@@ -163,7 +163,7 @@ void proc_sid_connector(struct task_struct *task)
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
-	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
 }
 
 void proc_exit_connector(struct task_struct *task)
@@ -190,7 +190,7 @@ void proc_exit_connector(struct task_struct *task)
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
-	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
 }
 
 /*
@@ -222,7 +222,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = rcvd_ack + 1;
 	msg->len = sizeof(*ev);
-	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
 }
 
 /**
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 05117f1..8485ac0 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -32,6 +32,7 @@
 #include <linux/spinlock.h>
 
 #include <net/sock.h>
+#include <net/net_namespace.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Evgeniy Polyakov <zbr at ioremap.net>");
@@ -42,6 +43,38 @@ static struct cn_dev cdev;
 
 static int cn_already_initialized;
 
+#ifdef CONFIG_NET
+struct cn_sock {
+	struct list_head list;
+	struct sock *sk;
+};
+static DEFINE_SPINLOCK(cn_sock_lock);
+
+static struct sock *find_sock(struct net *src_net)
+{
+	struct cn_dev *dev = &cdev;
+	struct cn_sock *cn_sk;
+	struct sock *nls = NULL;;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cn_sk, &dev->nls_list, list) {
+		if (sock_net(cn_sk->sk) == src_net) {
+			nls = cn_sk->sk;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return nls;
+}
+
+#else /* CONFIG_NET */
+static struct sock *find_sock(struct net *src_net)
+{
+	return cdev.nls;
+}
+#endif /* CONFIG_NET */
+
 /*
  * msg->seq and msg->ack are used to determine message genealogy.
  * When someone sends message it puts there locally unique sequence
@@ -63,7 +96,7 @@ static int cn_already_initialized;
  * a new message.
  *
  */
-int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
+int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask, struct net *src_net)
 {
 	struct cn_callback_entry *__cbq;
 	unsigned int size;
@@ -71,6 +104,8 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
 	struct nlmsghdr *nlh;
 	struct cn_msg *data;
 	struct cn_dev *dev = &cdev;
+	struct sock *nls;
+
 	u32 group = 0;
 	int found = 0;
 
@@ -92,7 +127,11 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
 		group = __group;
 	}
 
-	if (!netlink_has_listeners(dev->nls, group))
+	nls = find_sock(src_net);
+	if (!nls)
+		return -ESRCH;
+
+	if (!netlink_has_listeners(nls, group))
 		return -ESRCH;
 
 	size = NLMSG_SPACE(sizeof(*msg) + msg->len);
@@ -109,7 +148,7 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
 
 	NETLINK_CB(skb).dst_group = group;
 
-	return netlink_broadcast(dev->nls, skb, 0, group, gfp_mask);
+	return netlink_broadcast(nls, skb, 0, group, gfp_mask);
 
 nlmsg_failure:
 	kfree_skb(skb);
@@ -272,6 +311,96 @@ static const struct file_operations cn_file_ops = {
 	.release = single_release
 };
 
+#ifdef CONFIG_NET
+static int cn_net_init(struct net *net)
+{
+	struct cn_dev *dev = &cdev;
+	struct cn_sock *cn_sk;
+
+	cn_sk = kzalloc(sizeof(struct cn_sock), GFP_KERNEL);
+	if (!cn_sk)
+		return -ENOMEM;
+
+	cn_sk->sk = netlink_kernel_create(net, NETLINK_CONNECTOR,
+					 CN_NETLINK_USERS + 0xf,
+					 dev->input, NULL, THIS_MODULE);
+	if (!cn_sk->sk) {
+		printk(KERN_ERR
+		       "connector: unable to create netlink socket!\n");
+		kfree(cn_sk);
+		return -EIO;
+	}
+
+	spin_lock(&cn_sock_lock);
+	list_add_tail_rcu(&cn_sk->list, &dev->nls_list);
+	spin_unlock(&cn_sock_lock);
+
+	proc_net_fops_create(net, "connector", S_IRUGO, &cn_file_ops);
+
+ 	return 0;
+}
+
+static void cn_net_exit(struct net *net) {
+	struct cn_dev *dev = &cdev;
+	struct cn_sock *cn_sk;
+	int found = 0;
+
+	spin_lock(&cn_sock_lock);
+	list_for_each_entry(cn_sk, &dev->nls_list, list) {
+		if (sock_net(cn_sk->sk) == net) {
+			list_del_rcu(&cn_sk->list);
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock(&cn_sock_lock);
+
+	if (!found)
+		return;
+
+	proc_net_remove(net, "connector");
+	netlink_kernel_release(cn_sk->sk);
+	kfree(cn_sk);
+}
+
+static struct pernet_operations cn_net_ops = {
+	.init = cn_net_init,
+	.exit = cn_net_exit,
+};
+
+static int __devinit cn_init(void)
+{
+	int err = 0;
+	struct cn_dev *dev = &cdev;
+
+	INIT_LIST_HEAD(&dev->nls_list);
+	dev->input = cn_rx_skb;
+
+	dev->cbdev = cn_queue_alloc_dev("cqueue", NULL);
+	if (!dev->cbdev)
+		return -EINVAL;
+
+	err = register_pernet_subsys(&cn_net_ops);
+	if (err) {
+		cn_queue_free_dev(dev->cbdev);
+		return err;
+	}
+
+	cn_already_initialized = 1;
+	
+	return 0;
+}
+
+static void __devexit cn_fini(void)
+{
+	struct cn_dev *dev = &cdev;
+
+	cn_already_initialized = 0;
+	unregister_pernet_subsys(&cn_net_ops);
+	cn_queue_free_dev(dev->cbdev);
+}
+	
+#else /* CONFIG_NET */
 static int __devinit cn_init(void)
 {
 	struct cn_dev *dev = &cdev;
@@ -308,6 +437,7 @@ static void __devexit cn_fini(void)
 	cn_queue_free_dev(dev->cbdev);
 	netlink_kernel_release(dev->nls);
 }
+#endif /* CONFIG_NET */
 
 subsys_initcall(cn_init);
 module_exit(cn_fini);
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 049eaf1..3995ff5 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -66,7 +66,7 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
 	msg->seq = tfr->seq;
 	msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
 
-	r = cn_netlink_send(msg, 0, gfp_any());
+	r = cn_netlink_send(msg, 0, gfp_any(), &init_net);
 
 	return r;
 }
diff --git a/drivers/staging/pohmelfs/config.c b/drivers/staging/pohmelfs/config.c
index 89279ba..64189d7 100644
--- a/drivers/staging/pohmelfs/config.c
+++ b/drivers/staging/pohmelfs/config.c
@@ -252,7 +252,7 @@ static int pohmelfs_send_reply(int err, int msg_num, int action, struct cn_msg *
 	ack->error = err;
 	ack->msg_num = msg_num;
 
-	cn_netlink_send(&ack->msg, 0, GFP_KERNEL);
+	cn_netlink_send(&ack->msg, 0, GFP_KERNEL, &init_net);
 	kfree(ack);
 	return 0;
 }
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 52ec095..350a979 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -193,7 +193,7 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
 	uvfb_tasks[seq] = task;
 	mutex_unlock(&uvfb_lock);
 
-	err = cn_netlink_send(m, 0, GFP_KERNEL);
+	err = cn_netlink_send(m, 0, GFP_KERNEL, &init_net);
 	if (err == -ESRCH) {
 		/*
 		 * Try to start the userspace helper if sending
@@ -207,7 +207,7 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
 					"helper is installed and executable\n");
 		} else {
 			v86d_started = 1;
-			err = cn_netlink_send(m, 0, gfp_any());
+			err = cn_netlink_send(m, 0, gfp_any(), &init_net);
 			if (err == -ENOBUFS)
 				err = 0;
 		}
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 7e667bc..e0f196e 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -45,7 +45,7 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg)
 
 	memcpy(w, msg, sizeof(struct w1_netlink_msg));
 
-	cn_netlink_send(m, 0, GFP_KERNEL);
+	cn_netlink_send(m, 0, GFP_KERNEL, &init_net);
 }
 
 static void w1_send_slave(struct w1_master *dev, u64 rn)
@@ -68,7 +68,7 @@ static void w1_send_slave(struct w1_master *dev, u64 rn)
 	}
 
 	msg->ack++;
-	cn_netlink_send(msg, 0, GFP_KERNEL);
+	cn_netlink_send(msg, 0, GFP_KERNEL, &init_net);
 
 	msg->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd);
 	hdr->len = sizeof(struct w1_netlink_cmd);
@@ -88,7 +88,7 @@ static int w1_process_search_command(struct w1_master *dev, struct cn_msg *msg,
 	w1_search_devices(dev, search_type, w1_send_slave);
 
 	msg->ack = 0;
-	cn_netlink_send(msg, 0, GFP_KERNEL);
+	cn_netlink_send(msg, 0, GFP_KERNEL, &init_net);
 
 	dev->priv = NULL;
 	dev->priv_size = 0;
@@ -128,7 +128,7 @@ static int w1_send_read_reply(struct cn_msg *msg, struct w1_netlink_msg *hdr,
 
 	memcpy(c->data, cmd->data, c->len);
 
-	err = cn_netlink_send(cm, 0, GFP_KERNEL);
+	err = cn_netlink_send(cm, 0, GFP_KERNEL, &init_net);
 
 	kfree(data);
 
@@ -253,7 +253,7 @@ static int w1_process_command_root(struct cn_msg *msg, struct w1_netlink_msg *mc
 	mutex_lock(&w1_mlock);
 	list_for_each_entry(m, &w1_masters, w1_master_entry) {
 		if (cn->len + sizeof(*id) > PAGE_SIZE - sizeof(struct cn_msg)) {
-			cn_netlink_send(cn, 0, GFP_KERNEL);
+			cn_netlink_send(cn, 0, GFP_KERNEL, &init_net);
 			cn->ack++;
 			cn->len = sizeof(struct w1_netlink_msg);
 			w->len = 0;
@@ -266,7 +266,7 @@ static int w1_process_command_root(struct cn_msg *msg, struct w1_netlink_msg *mc
 		id++;
 	}
 	cn->ack = 0;
-	cn_netlink_send(cn, 0, GFP_KERNEL);
+	cn_netlink_send(cn, 0, GFP_KERNEL, &init_net);
 	mutex_unlock(&w1_mlock);
 
 	kfree(cn);
@@ -301,7 +301,7 @@ static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rm
 		cmsg->len += sizeof(*cmd);
 	}
 
-	error = cn_netlink_send(cmsg, 0, GFP_KERNEL);
+	error = cn_netlink_send(cmsg, 0, GFP_KERNEL, &init_net);
 	kfree(cmsg);
 
 	return error;
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 7e8ca75..dd3ebd6 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -122,7 +122,13 @@ struct cn_dev {
 	struct cb_id id;
 
 	u32 seq, groups;
+
+#ifdef CONFIG_NET
+	struct list_head nls_list;
+#else /* CONFIG_NET */
 	struct sock *nls;
+#endif /* CONFIG_NET */
+
 	void (*input) (struct sk_buff *skb);
 
 	struct cn_queue_dev *cbdev;
@@ -130,7 +136,7 @@ struct cn_dev {
 
 int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 void cn_del_callback(struct cb_id *);
-int cn_netlink_send(struct cn_msg *, u32, gfp_t);
+int cn_netlink_send(struct cn_msg *, u32, gfp_t, struct net *);
 
 int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
-- 
1.7.2.5




More information about the drbd-dev mailing list