[Drbd-dev] DRBD on container, lxc
Ken-ichirou MATSUZAWA
chamas at h4.dion.ne.jp
Mon Mar 21 13:27:27 CET 2011
Hello Evgeniy
Thank you for quick and valuable advice.
At Mon, 21 Mar 2011 08:27:00 +0300,
Evgeniy Polyakov wrote:
> You are holding rcu lock during skb allocation, which is not permitted
> for GFP_KERNEL, which may lead to a deadlock. Also netlink_broadcast()
I had tried using mutex at first but it was failed on
Documentation/connector/cn_test which call cn_netlink_send() from
timer, softirq. Conseqently I use rcu lock but I did not know which
is not permmited for GFP_KERNEL allocation.
I was thinking of lock issue and occured non-broadcating, spcifying
netns on calling cn_netlink_send(), appended.
But it will not work with multiple container in this way, I am
having thought what to do... copy list locally in the function?
Then, I have got mail
> In the upcoming drbd 8.4 and further,
> we abandoned connector, and switched to generic netlink,
My starting problem will solve.
Once again, thank you for valuable advice.
---
Documentation/connector/cn_test.c | 9 ++-
drivers/block/drbd/drbd_nl.c | 13 ++--
drivers/connector/cn_proc.c | 12 ++--
drivers/connector/connector.c | 136 +++++++++++++++++++++++++++++++-
drivers/md/dm-log-userspace-transfer.c | 2 +-
drivers/staging/pohmelfs/config.c | 2 +-
drivers/video/uvesafb.c | 4 +-
drivers/w1/w1_netlink.c | 14 ++--
include/linux/connector.h | 8 ++-
9 files changed, 171 insertions(+), 29 deletions(-)
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index 7764594..72fe63e 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -30,6 +30,9 @@
#include <linux/connector.h>
+#include <net/net_namespace.h>
+#include <linux/nsproxy.h>
+
static struct cb_id cn_test_id = { CN_NETLINK_USERS + 3, 0x456 };
static char cn_test_name[] = "cn_test";
static struct sock *nls;
@@ -130,6 +133,7 @@ static void cn_test_timer_func(unsigned long __data)
{
struct cn_msg *m;
char data[32];
+ struct net *net = (struct net *) __data;
pr_debug("%s: timer fired with data %lu\n", __func__, __data);
@@ -146,7 +150,7 @@ static void cn_test_timer_func(unsigned long __data)
memcpy(m + 1, data, m->len);
- cn_netlink_send(m, 0, GFP_ATOMIC);
+ cn_netlink_send(m, 0, GFP_ATOMIC, net);
kfree(m);
}
@@ -169,7 +173,8 @@ static int cn_test_init(void)
goto err_out;
}
- setup_timer(&cn_test_timer, cn_test_timer_func, 0);
+ setup_timer(&cn_test_timer, cn_test_timer_func,
+ (unsigned long) current->nsproxy->net_ns);
mod_timer(&cn_test_timer, jiffies + msecs_to_jiffies(1000));
pr_info("initialized with id={%u.%u}\n",
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 8cbfaa6..f8a921a 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -40,6 +40,7 @@
#include <linux/drbd_limits.h>
#include <linux/compiler.h>
#include <linux/kthread.h>
+#include <linux/nsproxy.h>
static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
@@ -2226,7 +2227,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
cn_reply->flags = 0;
- rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
+ rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL, &init_net);
if (rr && rr != -ESRCH)
printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
@@ -2319,7 +2320,7 @@ void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
reply->minor = mdev_to_minor(mdev);
reply->ret_code = NO_ERROR;
- cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+ cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
}
void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
@@ -2351,7 +2352,7 @@ void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
reply->minor = mdev_to_minor(mdev);
reply->ret_code = NO_ERROR;
- cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+ cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
}
void drbd_bcast_ee(struct drbd_conf *mdev,
@@ -2426,7 +2427,7 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
reply->minor = mdev_to_minor(mdev);
reply->ret_code = NO_ERROR;
- cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+ cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
kfree(cn_reply);
}
@@ -2465,7 +2466,7 @@ void drbd_bcast_sync_progress(struct drbd_conf *mdev)
reply->minor = mdev_to_minor(mdev);
reply->ret_code = NO_ERROR;
- cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+ cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
}
int __init drbd_nl_init(void)
@@ -2518,7 +2519,7 @@ void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
reply->ret_code = ret_code;
- rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+ rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO, &init_net);
if (rr && rr != -ESRCH)
printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
}
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 2b46a7e..09715a9 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -74,7 +74,7 @@ void proc_fork_connector(struct task_struct *task)
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
/* If cn_netlink_send() failed, the data is not sent */
- cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
}
void proc_exec_connector(struct task_struct *task)
@@ -99,7 +99,7 @@ void proc_exec_connector(struct task_struct *task)
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
- cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
}
void proc_id_connector(struct task_struct *task, int which_id)
@@ -138,7 +138,7 @@ void proc_id_connector(struct task_struct *task, int which_id)
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
- cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
}
void proc_sid_connector(struct task_struct *task)
@@ -163,7 +163,7 @@ void proc_sid_connector(struct task_struct *task)
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
- cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
}
void proc_exit_connector(struct task_struct *task)
@@ -190,7 +190,7 @@ void proc_exit_connector(struct task_struct *task)
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = 0; /* not used */
msg->len = sizeof(*ev);
- cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
}
/*
@@ -222,7 +222,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
msg->ack = rcvd_ack + 1;
msg->len = sizeof(*ev);
- cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+ cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL, &init_net);
}
/**
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 05117f1..8485ac0 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -32,6 +32,7 @@
#include <linux/spinlock.h>
#include <net/sock.h>
+#include <net/net_namespace.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Evgeniy Polyakov <zbr at ioremap.net>");
@@ -42,6 +43,38 @@ static struct cn_dev cdev;
static int cn_already_initialized;
+#ifdef CONFIG_NET
+struct cn_sock {
+ struct list_head list;
+ struct sock *sk;
+};
+static DEFINE_SPINLOCK(cn_sock_lock);
+
+static struct sock *find_sock(struct net *src_net)
+{
+ struct cn_dev *dev = &cdev;
+ struct cn_sock *cn_sk;
+ struct sock *nls = NULL;;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(cn_sk, &dev->nls_list, list) {
+ if (sock_net(cn_sk->sk) == src_net) {
+ nls = cn_sk->sk;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return nls;
+}
+
+#else /* CONFIG_NET */
+static struct sock *find_sock(struct net *src_net)
+{
+ return cdev.nls;
+}
+#endif /* CONFIG_NET */
+
/*
* msg->seq and msg->ack are used to determine message genealogy.
* When someone sends message it puts there locally unique sequence
@@ -63,7 +96,7 @@ static int cn_already_initialized;
* a new message.
*
*/
-int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
+int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask, struct net *src_net)
{
struct cn_callback_entry *__cbq;
unsigned int size;
@@ -71,6 +104,8 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
struct nlmsghdr *nlh;
struct cn_msg *data;
struct cn_dev *dev = &cdev;
+ struct sock *nls;
+
u32 group = 0;
int found = 0;
@@ -92,7 +127,11 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
group = __group;
}
- if (!netlink_has_listeners(dev->nls, group))
+ nls = find_sock(src_net);
+ if (!nls)
+ return -ESRCH;
+
+ if (!netlink_has_listeners(nls, group))
return -ESRCH;
size = NLMSG_SPACE(sizeof(*msg) + msg->len);
@@ -109,7 +148,7 @@ int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
NETLINK_CB(skb).dst_group = group;
- return netlink_broadcast(dev->nls, skb, 0, group, gfp_mask);
+ return netlink_broadcast(nls, skb, 0, group, gfp_mask);
nlmsg_failure:
kfree_skb(skb);
@@ -272,6 +311,96 @@ static const struct file_operations cn_file_ops = {
.release = single_release
};
+#ifdef CONFIG_NET
+static int cn_net_init(struct net *net)
+{
+ struct cn_dev *dev = &cdev;
+ struct cn_sock *cn_sk;
+
+ cn_sk = kzalloc(sizeof(struct cn_sock), GFP_KERNEL);
+ if (!cn_sk)
+ return -ENOMEM;
+
+ cn_sk->sk = netlink_kernel_create(net, NETLINK_CONNECTOR,
+ CN_NETLINK_USERS + 0xf,
+ dev->input, NULL, THIS_MODULE);
+ if (!cn_sk->sk) {
+ printk(KERN_ERR
+ "connector: unable to create netlink socket!\n");
+ kfree(cn_sk);
+ return -EIO;
+ }
+
+ spin_lock(&cn_sock_lock);
+ list_add_tail_rcu(&cn_sk->list, &dev->nls_list);
+ spin_unlock(&cn_sock_lock);
+
+ proc_net_fops_create(net, "connector", S_IRUGO, &cn_file_ops);
+
+ return 0;
+}
+
+static void cn_net_exit(struct net *net) {
+ struct cn_dev *dev = &cdev;
+ struct cn_sock *cn_sk;
+ int found = 0;
+
+ spin_lock(&cn_sock_lock);
+ list_for_each_entry(cn_sk, &dev->nls_list, list) {
+ if (sock_net(cn_sk->sk) == net) {
+ list_del_rcu(&cn_sk->list);
+ found = 1;
+ break;
+ }
+ }
+ spin_unlock(&cn_sock_lock);
+
+ if (!found)
+ return;
+
+ proc_net_remove(net, "connector");
+ netlink_kernel_release(cn_sk->sk);
+ kfree(cn_sk);
+}
+
+static struct pernet_operations cn_net_ops = {
+ .init = cn_net_init,
+ .exit = cn_net_exit,
+};
+
+static int __devinit cn_init(void)
+{
+ int err = 0;
+ struct cn_dev *dev = &cdev;
+
+ INIT_LIST_HEAD(&dev->nls_list);
+ dev->input = cn_rx_skb;
+
+ dev->cbdev = cn_queue_alloc_dev("cqueue", NULL);
+ if (!dev->cbdev)
+ return -EINVAL;
+
+ err = register_pernet_subsys(&cn_net_ops);
+ if (err) {
+ cn_queue_free_dev(dev->cbdev);
+ return err;
+ }
+
+ cn_already_initialized = 1;
+
+ return 0;
+}
+
+static void __devexit cn_fini(void)
+{
+ struct cn_dev *dev = &cdev;
+
+ cn_already_initialized = 0;
+ unregister_pernet_subsys(&cn_net_ops);
+ cn_queue_free_dev(dev->cbdev);
+}
+
+#else /* CONFIG_NET */
static int __devinit cn_init(void)
{
struct cn_dev *dev = &cdev;
@@ -308,6 +437,7 @@ static void __devexit cn_fini(void)
cn_queue_free_dev(dev->cbdev);
netlink_kernel_release(dev->nls);
}
+#endif /* CONFIG_NET */
subsys_initcall(cn_init);
module_exit(cn_fini);
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 049eaf1..3995ff5 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -66,7 +66,7 @@ static int dm_ulog_sendto_server(struct dm_ulog_request *tfr)
msg->seq = tfr->seq;
msg->len = sizeof(struct dm_ulog_request) + tfr->data_size;
- r = cn_netlink_send(msg, 0, gfp_any());
+ r = cn_netlink_send(msg, 0, gfp_any(), &init_net);
return r;
}
diff --git a/drivers/staging/pohmelfs/config.c b/drivers/staging/pohmelfs/config.c
index 89279ba..64189d7 100644
--- a/drivers/staging/pohmelfs/config.c
+++ b/drivers/staging/pohmelfs/config.c
@@ -252,7 +252,7 @@ static int pohmelfs_send_reply(int err, int msg_num, int action, struct cn_msg *
ack->error = err;
ack->msg_num = msg_num;
- cn_netlink_send(&ack->msg, 0, GFP_KERNEL);
+ cn_netlink_send(&ack->msg, 0, GFP_KERNEL, &init_net);
kfree(ack);
return 0;
}
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 52ec095..350a979 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -193,7 +193,7 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
uvfb_tasks[seq] = task;
mutex_unlock(&uvfb_lock);
- err = cn_netlink_send(m, 0, GFP_KERNEL);
+ err = cn_netlink_send(m, 0, GFP_KERNEL, &init_net);
if (err == -ESRCH) {
/*
* Try to start the userspace helper if sending
@@ -207,7 +207,7 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
"helper is installed and executable\n");
} else {
v86d_started = 1;
- err = cn_netlink_send(m, 0, gfp_any());
+ err = cn_netlink_send(m, 0, gfp_any(), &init_net);
if (err == -ENOBUFS)
err = 0;
}
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 7e667bc..e0f196e 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -45,7 +45,7 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg)
memcpy(w, msg, sizeof(struct w1_netlink_msg));
- cn_netlink_send(m, 0, GFP_KERNEL);
+ cn_netlink_send(m, 0, GFP_KERNEL, &init_net);
}
static void w1_send_slave(struct w1_master *dev, u64 rn)
@@ -68,7 +68,7 @@ static void w1_send_slave(struct w1_master *dev, u64 rn)
}
msg->ack++;
- cn_netlink_send(msg, 0, GFP_KERNEL);
+ cn_netlink_send(msg, 0, GFP_KERNEL, &init_net);
msg->len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd);
hdr->len = sizeof(struct w1_netlink_cmd);
@@ -88,7 +88,7 @@ static int w1_process_search_command(struct w1_master *dev, struct cn_msg *msg,
w1_search_devices(dev, search_type, w1_send_slave);
msg->ack = 0;
- cn_netlink_send(msg, 0, GFP_KERNEL);
+ cn_netlink_send(msg, 0, GFP_KERNEL, &init_net);
dev->priv = NULL;
dev->priv_size = 0;
@@ -128,7 +128,7 @@ static int w1_send_read_reply(struct cn_msg *msg, struct w1_netlink_msg *hdr,
memcpy(c->data, cmd->data, c->len);
- err = cn_netlink_send(cm, 0, GFP_KERNEL);
+ err = cn_netlink_send(cm, 0, GFP_KERNEL, &init_net);
kfree(data);
@@ -253,7 +253,7 @@ static int w1_process_command_root(struct cn_msg *msg, struct w1_netlink_msg *mc
mutex_lock(&w1_mlock);
list_for_each_entry(m, &w1_masters, w1_master_entry) {
if (cn->len + sizeof(*id) > PAGE_SIZE - sizeof(struct cn_msg)) {
- cn_netlink_send(cn, 0, GFP_KERNEL);
+ cn_netlink_send(cn, 0, GFP_KERNEL, &init_net);
cn->ack++;
cn->len = sizeof(struct w1_netlink_msg);
w->len = 0;
@@ -266,7 +266,7 @@ static int w1_process_command_root(struct cn_msg *msg, struct w1_netlink_msg *mc
id++;
}
cn->ack = 0;
- cn_netlink_send(cn, 0, GFP_KERNEL);
+ cn_netlink_send(cn, 0, GFP_KERNEL, &init_net);
mutex_unlock(&w1_mlock);
kfree(cn);
@@ -301,7 +301,7 @@ static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rm
cmsg->len += sizeof(*cmd);
}
- error = cn_netlink_send(cmsg, 0, GFP_KERNEL);
+ error = cn_netlink_send(cmsg, 0, GFP_KERNEL, &init_net);
kfree(cmsg);
return error;
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 7e8ca75..dd3ebd6 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -122,7 +122,13 @@ struct cn_dev {
struct cb_id id;
u32 seq, groups;
+
+#ifdef CONFIG_NET
+ struct list_head nls_list;
+#else /* CONFIG_NET */
struct sock *nls;
+#endif /* CONFIG_NET */
+
void (*input) (struct sk_buff *skb);
struct cn_queue_dev *cbdev;
@@ -130,7 +136,7 @@ struct cn_dev {
int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
void cn_del_callback(struct cb_id *);
-int cn_netlink_send(struct cn_msg *, u32, gfp_t);
+int cn_netlink_send(struct cn_msg *, u32, gfp_t, struct net *);
int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
--
1.7.2.5
More information about the drbd-dev
mailing list