diff options
| author | Felix Blyakher <felixb@sgi.com> | 2009-06-10 17:07:47 -0500 |
|---|---|---|
| committer | Felix Blyakher <felixb@sgi.com> | 2009-06-10 17:07:47 -0500 |
| commit | 4e73e0eb633f8a1b5cbf20e7f42c6dbfec1d1ca7 (patch) | |
| tree | 0cea46e43f0625244c3d06a71d6559e5ec5419ca /net | |
| parent | 4156e735d3abde8e9243b5d22f7999dd3fffab2e (diff) | |
| parent | 07a2039b8eb0af4ff464efd3dfd95de5c02648c6 (diff) | |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'net')
123 files changed, 2012 insertions, 1496 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c index f1611a1e06a7..539e6064e6d4 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -215,3 +215,5 @@ struct net_device *alloc_fddidev(int sizeof_priv) return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup); } EXPORT_SYMBOL(alloc_fddidev); + +MODULE_LICENSE("GPL"); diff --git a/net/802/tr.c b/net/802/tr.c index e7eb13084d71..e874447ad144 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -561,6 +561,9 @@ static int rif_seq_show(struct seq_file *seq, void *v) } seq_putc(seq, '\n'); } + + if (dev) + dev_put(dev); } return 0; } diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2b7390e377b3..d1e10546eb85 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -492,6 +492,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, continue; dev_change_flags(vlandev, flgs & ~IFF_UP); + vlan_transfer_operstate(dev, vlandev); } break; @@ -507,6 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, continue; dev_change_flags(vlandev, flgs | IFF_UP); + vlan_transfer_operstate(dev, vlandev); } break; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 654e45f5719d..c67fe6f75653 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -121,8 +121,10 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, if (!skb) return NET_RX_DROP; - if (netpoll_rx_on(skb)) + if (netpoll_rx_on(skb)) { + skb->protocol = eth_type_trans(skb, skb->dev); return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + } return napi_frags_finish(napi, skb, vlan_gro_common(napi, grp, vlan_tci, skb)); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 1b34135cf990..b4b9068e55a7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -462,6 +462,7 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + netif_carrier_on(dev); return 0; clear_allmulti: @@ -471,6 +472,7 @@ del_unicast: if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); out: + netif_carrier_off(dev); return err; } @@ -492,6 +494,7 @@ static int vlan_dev_stop(struct net_device *dev) if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); + netif_carrier_off(dev); return 0; } @@ -612,6 +615,8 @@ static int vlan_dev_init(struct net_device *dev) struct net_device *real_dev = vlan_dev_info(dev)->real_dev; int subclass = 0; + netif_carrier_off(dev); + /* IFF_BROADCAST|IFF_MULTICAST; ??? */ dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); dev->iflink = real_dev->ifindex; @@ -668,7 +673,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev, const struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; - if (!real_dev->ethtool_ops->get_settings) + if (!real_dev->ethtool_ops || + !real_dev->ethtool_ops->get_settings) return -EOPNOTSUPP; return real_dev->ethtool_ops->get_settings(real_dev, cmd); diff --git a/net/9p/client.c b/net/9p/client.c index 1eb580c38fbb..dd43a8289b0d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -203,7 +203,6 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) p9pdu_reset(req->tc); p9pdu_reset(req->rc); - req->flush_tag = 0; req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; @@ -324,35 +323,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) */ void p9_client_cb(struct p9_client *c, struct p9_req_t *req) { - struct p9_req_t *other_req; - unsigned long flags; - P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); - - if (req->status == REQ_STATUS_ERROR) - wake_up(req->wq); - - if (req->flush_tag) { /* flush receive path */ - P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag); - spin_lock_irqsave(&c->lock, flags); - other_req = p9_tag_lookup(c, req->flush_tag); - if (other_req->status != REQ_STATUS_FLSH) /* stale flush */ - spin_unlock_irqrestore(&c->lock, flags); - else { - other_req->status = REQ_STATUS_FLSHD; - spin_unlock_irqrestore(&c->lock, flags); - wake_up(other_req->wq); - } - p9_free_req(c, req); - } else { /* normal receive path */ - P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag); - spin_lock_irqsave(&c->lock, flags); - if (req->status != REQ_STATUS_FLSHD) - req->status = REQ_STATUS_RCVD; - spin_unlock_irqrestore(&c->lock, flags); - wake_up(req->wq); - P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); - } + wake_up(req->wq); + P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); } EXPORT_SYMBOL(p9_client_cb); @@ -486,9 +459,15 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (IS_ERR(req)) return PTR_ERR(req); - req->flush_tag = oldtag; - /* we don't free anything here because RPC isn't complete */ + /* if we haven't received a response for oldreq, + remove it from the list. */ + spin_lock(&c->lock); + if (oldreq->status == REQ_STATUS_FLSH) + list_del(&oldreq->req_list); + spin_unlock(&c->lock); + + p9_free_req(c, req); return 0; } @@ -509,7 +488,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) struct p9_req_t *req; unsigned long flags; int sigpending; - int flushed = 0; P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -546,42 +524,28 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) goto reterr; } - /* if it was a flush we just transmitted, return our tag */ - if (type == P9_TFLUSH) - return req; -again: P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); err = wait_event_interruptible(*req->wq, req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n", - req->wq, tag, err, flushed); + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", + req->wq, tag, err); if (req->status == REQ_STATUS_ERROR) { P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; - } else if (err == -ERESTARTSYS && flushed) { - P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n"); - goto again; - } else if (req->status == REQ_STATUS_FLSHD) { - P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n"); - err = -ERESTARTSYS; } - if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) { + if ((err == -ERESTARTSYS) && (c->status == Connected)) { P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); - spin_lock_irqsave(&c->lock, flags); - if (req->status == REQ_STATUS_SENT) - req->status = REQ_STATUS_FLSH; - spin_unlock_irqrestore(&c->lock, flags); sigpending = 1; - flushed = 1; clear_thread_flag(TIF_SIGPENDING); - if (c->trans_mod->cancel(c, req)) { - err = p9_client_flush(c, req); - if (err == 0) - goto again; - } + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; } if (sigpending) { @@ -1244,19 +1208,53 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); + p9_free_req(clnt, req); + return ret; + free_and_error: p9_free_req(clnt, req); error: - return ret; + kfree(ret); + return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_stat); +static int p9_client_statsize(struct p9_wstat *wst, int optional) +{ + int ret; + + /* size[2] type[2] dev[4] qid[13] */ + /* mode[4] atime[4] mtime[4] length[8]*/ + /* name[s] uid[s] gid[s] muid[s] */ + ret = 2+2+4+13+4+4+4+8+2+2+2+2; + + if (wst->name) + ret += strlen(wst->name); + if (wst->uid) + ret += strlen(wst->uid); + if (wst->gid) + ret += strlen(wst->gid); + if (wst->muid) + ret += strlen(wst->muid); + + if (optional) { + ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ + if (wst->extension) + ret += strlen(wst->extension); + } + + return ret; +} + int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) { int err; struct p9_req_t *req; struct p9_client *clnt; + err = 0; + clnt = fid->clnt; + wst->size = p9_client_statsize(wst, clnt->dotu); P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); P9_DPRINTK(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" @@ -1268,10 +1266,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) wst->atime, wst->mtime, (unsigned long long)wst->length, wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); - err = 0; - clnt = fid->clnt; - req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst); + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index c613ed08a5ee..a2a1814c7a8d 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -213,8 +213,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err) spin_unlock_irqrestore(&m->client->lock, flags); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - list_del(&req->req_list); P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + list_del(&req->req_list); p9_client_cb(m->client, req); } } @@ -336,7 +336,8 @@ static void p9_read_work(struct work_struct *work) "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); m->req = p9_tag_lookup(m->client, tag); - if (!m->req) { + if (!m->req || (m->req->status != REQ_STATUS_SENT && + m->req->status != REQ_STATUS_FLSH)) { P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", tag); err = -EIO; @@ -361,10 +362,11 @@ static void p9_read_work(struct work_struct *work) if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); spin_lock(&m->client->lock); + if (m->req->status != REQ_STATUS_ERROR) + m->req->status = REQ_STATUS_RCVD; list_del(&m->req->req_list); spin_unlock(&m->client->lock); p9_client_cb(m->client, m->req); - m->rbuf = NULL; m->rpos = 0; m->rsize = 0; @@ -454,6 +456,7 @@ static void p9_write_work(struct work_struct *work) req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; + P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc->sdata; @@ -683,12 +686,13 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); - list_del(&req->req_list); if (req->status == REQ_STATUS_UNSENT) { + list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; ret = 0; - } + } else if (req->status == REQ_STATUS_SENT) + req->status = REQ_STATUS_FLSH; spin_unlock(&client->lock); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 7fa0eb20b2f6..ac4990041ebb 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -295,6 +295,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, goto err_out; req->rc = c->rc; + req->status = REQ_STATUS_RCVD; p9_client_cb(client, req); return; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 2d7781ec663b..bb8579a141a8 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -134,6 +134,7 @@ static void req_done(struct virtqueue *vq) P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); + req->status = REQ_STATUS_RCVD; p9_client_cb(chan->client, req); } } diff --git a/net/Kconfig b/net/Kconfig index ec93e7e38b38..c19f549c8e74 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -119,12 +119,6 @@ menuconfig NETFILTER <file:Documentation/Changes> under "iptables" for the location of these packages. - Make sure to say N to "Fast switching" below if you intend to say Y - here, as Fast switching currently bypasses netfilter. - - Chances are that you should say Y here if you compile a kernel which - will run as a router and N for regular hosts. If unsure, say N. - if NETFILTER config NETFILTER_DEBUG @@ -140,7 +134,7 @@ config NETFILTER_ADVANCED default y help If you say Y here you can select between all the netfilter modules. - If you say N the more ununsual ones will not be shown and the + If you say N the more unusual ones will not be shown and the basic ones needed by most people will default to 'M'. If unsure, say Y. diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 334fcd4a4ea4..3100a8940afc 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -549,6 +549,7 @@ static void br2684_setup(struct net_device *netdev) struct br2684_dev *brdev = BRPRIV(netdev); ether_setup(netdev); + brdev->net_dev = netdev; netdev->netdev_ops = &br2684_netdev_ops; diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 57aeba729bae..832bcf092a01 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -148,9 +148,13 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) { struct ax25_uid_assoc *pt; struct hlist_node *node; - int i = 0; + int i = 1; read_lock(&ax25_uid_lock); + + if (*pos == 0) + return SEQ_START_TOKEN; + ax25_uid_for_each(pt, node, &ax25_uid_list) { if (i == *pos) return pt; @@ -162,8 +166,10 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - - return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, + if (v == SEQ_START_TOKEN) + return ax25_uid_list.first; + else + return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, ax25_uid_assoc, uid_node); } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1181db08d9de..fa47d5d84f5c 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -171,10 +171,8 @@ static void hci_conn_timeout(unsigned long arg) switch (conn->state) { case BT_CONNECT: case BT_CONNECT2: - if (conn->type == ACL_LINK) + if (conn->type == ACL_LINK && conn->out) hci_acl_connect_cancel(conn); - else - hci_acl_disconn(conn, 0x13); break; case BT_CONFIG: case BT_CONNECTED: @@ -215,6 +213,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->state = BT_OPEN; conn->power_save = 1; + conn->disc_timeout = HCI_DISCONN_TIMEOUT; switch (type) { case ACL_LINK: @@ -247,6 +246,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); + hci_conn_init_sysfs(conn); + tasklet_enable(&hdev->tx_task); return conn; @@ -289,6 +290,8 @@ int hci_conn_del(struct hci_conn *conn) hci_conn_del_sysfs(conn); + hci_dev_put(hdev); + return 0; } @@ -424,12 +427,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (sec_level == BT_SECURITY_SDP) return 1; - if (sec_level == BT_SECURITY_LOW) { - if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) - return hci_conn_auth(conn, sec_level, auth_type); - else - return 1; - } + if (sec_level == BT_SECURITY_LOW && + (!conn->ssp_mode || !conn->hdev->ssp_mode)) + return 1; if (conn->link_mode & HCI_LM_ENCRYPT) return hci_conn_auth(conn, sec_level, auth_type); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 55534244c3a0..184ba0a88ec0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -866,8 +866,16 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) - goto unlock; + if (!conn) { + if (ev->link_type != SCO_LINK) + goto unlock; + + conn = hci_conn_hash_lookup_ba(hdev, ESCO_LINK, &ev->bdaddr); + if (!conn) + goto unlock; + + conn->type = SCO_LINK; + } if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); @@ -875,6 +883,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; } else conn->state = BT_CONNECTED; @@ -1055,9 +1064,14 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_proto_connect_cfm(conn, ev->status); hci_conn_put(conn); } - } else + } else { hci_auth_cfm(conn, ev->status); + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) { if (!ev->status) { struct hci_cp_set_conn_encrypt cp; @@ -1471,7 +1485,21 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_pin_code_req *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn && conn->state == BT_CONNECTED) { + hci_conn_hold(conn); + conn->disc_timeout = HCI_PAIRING_TIMEOUT; + hci_conn_put(conn); + } + + hci_dev_unlock(hdev); } static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1481,7 +1509,21 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_link_key_notify *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) { + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + + hci_dev_unlock(hdev); } static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1646,20 +1688,28 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu conn->type = SCO_LINK; } - if (conn->out && ev->status == 0x1c && conn->attempt < 2) { - conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | - (hdev->esco_type & EDR_ESCO_MASK); - hci_setup_sync(conn, conn->link->handle); - goto unlock; - } - - if (!ev->status) { + switch (ev->status) { + case 0x00: conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; hci_conn_add_sysfs(conn); - } else + break; + + case 0x1c: /* SCO interval rejected */ + case 0x1f: /* Unspecified error */ + if (conn->out && conn->attempt < 2) { + conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | + (hdev->esco_type & EDR_ESCO_MASK); + hci_setup_sync(conn, conn->link->handle); + goto unlock; + } + /* fall through */ + + default: conn->state = BT_CLOSED; + break; + } hci_proto_connect_cfm(conn, ev->status); if (ev->status) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ed82796d4a0f..95f7a7a544b4 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -9,8 +9,7 @@ struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); -static struct workqueue_struct *btaddconn; -static struct workqueue_struct *btdelconn; +static struct workqueue_struct *bt_workq; static inline char *link_typetostr(int type) { @@ -88,35 +87,17 @@ static struct device_type bt_link = { static void add_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_add); + struct hci_dev *hdev = conn->hdev; - flush_workqueue(btdelconn); + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); return; } -} - -void hci_conn_add_sysfs(struct hci_conn *conn) -{ - struct hci_dev *hdev = conn->hdev; - - BT_DBG("conn %p", conn); - - conn->dev.type = &bt_link; - conn->dev.class = bt_class; - conn->dev.parent = &hdev->dev; - dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); - - dev_set_drvdata(&conn->dev, conn); - - device_initialize(&conn->dev); - - INIT_WORK(&conn->work, add_conn); - - queue_work(btaddconn, &conn->work); + hci_dev_hold(hdev); } /* @@ -131,9 +112,12 @@ static int __match_tty(struct device *dev, void *data) static void del_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_del); struct hci_dev *hdev = conn->hdev; + if (!device_is_registered(&conn->dev)) + return; + while (1) { struct device *dev; @@ -146,19 +130,40 @@ static void del_conn(struct work_struct *work) device_del(&conn->dev); put_device(&conn->dev); + hci_dev_put(hdev); } -void hci_conn_del_sysfs(struct hci_conn *conn) +void hci_conn_init_sysfs(struct hci_conn *conn) { + struct hci_dev *hdev = conn->hdev; + BT_DBG("conn %p", conn); - if (!device_is_registered(&conn->dev)) - return; + conn->dev.type = &bt_link; + conn->dev.class = bt_class; + conn->dev.parent = &hdev->dev; - INIT_WORK(&conn->work, del_conn); + dev_set_drvdata(&conn->dev, conn); + + device_initialize(&conn->dev); - queue_work(btdelconn, &conn->work); + INIT_WORK(&conn->work_add, add_conn); + INIT_WORK(&conn->work_del, del_conn); +} + +void hci_conn_add_sysfs(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + queue_work(bt_workq, &conn->work_add); +} + +void hci_conn_del_sysfs(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + queue_work(bt_workq, &conn->work_del); } static inline char *host_typetostr(int type) @@ -435,20 +440,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev) int __init bt_sysfs_init(void) { - btaddconn = create_singlethread_workqueue("btaddconn"); - if (!btaddconn) + bt_workq = create_singlethread_workqueue("bluetooth"); + if (!bt_workq) return -ENOMEM; - btdelconn = create_singlethread_workqueue("btdelconn"); - if (!btdelconn) { - destroy_workqueue(btaddconn); - return -ENOMEM; - } - bt_class = class_create(THIS_MODULE, "bluetooth"); if (IS_ERR(bt_class)) { - destroy_workqueue(btdelconn); - destroy_workqueue(btaddconn); + destroy_workqueue(bt_workq); return PTR_ERR(bt_class); } @@ -457,8 +455,7 @@ int __init bt_sysfs_init(void) void bt_sysfs_cleanup(void) { - destroy_workqueue(btaddconn); - destroy_workqueue(btdelconn); + destroy_workqueue(bt_workq); class_destroy(bt_class); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 1d0fb0f23c63..374536e050aa 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1194,6 +1194,8 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d) rfcomm_send_ua(d->session, d->dlci); + rfcomm_dlc_clear_timer(d); + rfcomm_dlc_lock(d); d->state = BT_CONNECTED; d->state_change(d, 0); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index abdc703a11d2..cab71ea2796d 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -1093,11 +1093,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) } } -static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused) -{ - return 0; -} - static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; @@ -1156,7 +1151,6 @@ static const struct tty_operations rfcomm_ops = { .send_xchar = rfcomm_tty_send_xchar, .hangup = rfcomm_tty_hangup, .wait_until_sent = rfcomm_tty_wait_until_sent, - .read_proc = rfcomm_tty_read_proc, .tiocmget = rfcomm_tty_tiocmget, .tiocmset = rfcomm_tty_tiocmset, }; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 30b88777c3df..5ee1a3682bf2 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -134,6 +134,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_PAUSE)) goto drop; + /* If STP is turned off, then forward */ + if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) + goto forward; + if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, NULL, br_handle_local_finish)) return NULL; /* frame consumed by filter */ @@ -141,6 +145,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) return skb; /* continue processing */ } +forward: switch (p->state) { case BR_STATE_FORWARDING: rhook = rcu_dereference(br_should_route_hook); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3953ac4214c8..e4a418fcb35b 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -788,15 +788,23 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } +#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IP) && + if (skb->nfct != NULL && + (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) && skb->len > skb->dev->mtu && !skb_is_gso(skb)) return ip_fragment(skb, br_dev_queue_push_xmit); else return br_dev_queue_push_xmit(skb); } +#else +static int br_nf_dev_queue_xmit(struct sk_buff *skb) +{ + return br_dev_queue_push_xmit(skb); +} +#endif /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 6e63ec3f1fcf..0660515f3992 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -297,6 +297,9 @@ void br_topology_change_detection(struct net_bridge *br) { int isroot = br_is_root_bridge(br); + if (br->stp_enabled != BR_KERNEL_STP) + return; + pr_info("%s: topology change detected, %s\n", br->dev->name, isroot ? "propagating" : "sending tcn bpdu"); diff --git a/net/can/af_can.c b/net/can/af_can.c index 547bafc79e28..10f0528c3bf5 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -674,8 +674,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, rcu_read_unlock(); - /* free the skbuff allocated by the netdevice driver */ - kfree_skb(skb); + /* consume the skbuff allocated by the netdevice driver */ + consume_skb(skb); if (matches > 0) { can_stats.matches++; diff --git a/net/core/datagram.c b/net/core/datagram.c index d0de644b378d..b01a76abe1d2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } +static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + unsigned long bits = (unsigned long)key; + + /* + * Avoid a wakeup if event not interesting for us + */ + if (bits && !(bits & (POLLIN | POLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} /* * Wait for a packet.. */ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, receiver_wake_function); prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); diff --git a/net/core/dev.c b/net/core/dev.c index 52fea5b28ca6..e2e9e4af3ace 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1336,7 +1336,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; +#ifdef CONFIG_NET_CLS_ACT + if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) + net_timestamp(skb); +#else net_timestamp(skb); +#endif rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1430,7 +1435,7 @@ void netif_device_detach(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && netif_running(dev)) { - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); } } EXPORT_SYMBOL(netif_device_detach); @@ -1445,7 +1450,7 @@ void netif_device_attach(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && netif_running(dev)) { - netif_wake_queue(dev); + netif_tx_wake_all_queues(dev); __netdev_watchdog_up(dev); } } @@ -1730,11 +1735,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { u32 hash; - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - } else if (skb->sk && skb->sk->sk_hash) { + if (skb_rx_queue_recorded(skb)) + return skb_get_rx_queue(skb) % dev->real_num_tx_queues; + + if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; - } else + else hash = skb->protocol; hash = jhash_1word(hash, skb_tx_hashrnd); @@ -2328,8 +2334,10 @@ static int napi_gro_complete(struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int err = -ENOENT; - if (NAPI_GRO_CB(skb)->count == 1) + if (NAPI_GRO_CB(skb)->count == 1) { + skb_shinfo(skb)->gso_size = 0; goto out; + } rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -2348,7 +2356,6 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - skb_shinfo(skb)->gso_size = 0; return netif_receive_skb(skb); } @@ -2472,8 +2479,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return GRO_NORMAL; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = !compare_ether_header( - skb_mac_header(p), skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) + && !compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } @@ -2538,9 +2546,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, } BUG_ON(info->nr_frags > MAX_SKB_FRAGS); - frag = &info->frags[info->nr_frags - 1]; + frag = info->frags; - for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { + for (i = 0; i < info->nr_frags; i++) { skb_fill_page_desc(skb, i, frag->page, frag->page_offset, frag->size); frag++; @@ -4398,7 +4406,7 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; #ifdef CONFIG_COMPAT_NET_DEV_OPS - /* Netdevice_ops API compatiability support. + /* Netdevice_ops API compatibility support. * This is temporary until all network devices are converted. */ if (dev->netdev_ops) { @@ -4409,7 +4417,7 @@ int register_netdevice(struct net_device *dev) dev->name, netdev_drivername(dev, drivername, 64)); /* This works only because net_device_ops and the - compatiablity structure are the same. */ + compatibility structure are the same. */ dev->netdev_ops = (void *) &(dev->init); } #endif diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 244ca56dffac..d9d5160610d5 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -261,8 +261,7 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) ret = 0; err_out: - if (rule_buf) - kfree(rule_buf); + kfree(rule_buf); return ret; } diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9cc9f95b109e..6d62d4618cfc 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,9 +66,9 @@ NOTES. - * The stored value for avbps is scaled by 2^5, so that maximal - rate is ~1Gbit, avpps is scaled by 2^10. - + * avbps is scaled by 2^5, avpps is scaled by 2^10. + * both values are reported as 32 bit unsigned values. bps can + overflow for fast links : max speed being 34360Mbit/sec * Minimal interval is HZ/4=250msec (it is the greatest common divisor for HZ=100 and HZ=1024 8)), maximal interval is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals @@ -86,9 +86,9 @@ struct gen_estimator spinlock_t *stats_lock; int ewma_log; u64 last_bytes; + u64 avbps; u32 last_packets; u32 avpps; - u32 avbps; struct rcu_head e_rcu; struct rb_node node; }; @@ -115,6 +115,7 @@ static void est_timer(unsigned long arg) rcu_read_lock(); list_for_each_entry_rcu(e, &elist[idx].list, list) { u64 nbytes; + u64 brate; u32 npackets; u32 rate; @@ -125,9 +126,9 @@ static void est_timer(unsigned long arg) nbytes = e->bstats->bytes; npackets = e->bstats->packets; - rate = (nbytes - e->last_bytes)<<(7 - idx); + brate = (nbytes - e->last_bytes)<<(7 - idx); e->last_bytes = nbytes; - e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log; + e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log; e->rate_est->bps = (e->avbps+0xF)>>5; rate = (npackets - e->last_packets)<<(12 - idx); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b5873bdff612..64f51eec6576 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -175,9 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; - const struct net_device_ops *ops = dev->netdev_ops; + const struct net_device_ops *ops; + + if (!dev || !netif_running(dev)) + return; - if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) + ops = dev->netdev_ops; + if (!ops->ndo_poll_controller) return; /* Process pending work on NIC */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 3779c1438c11..0666a827bc62 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows) { /* let go of the SAs if we have them */ int i = 0; - for (; i < pkt_dev->nflows; i++){ + for (; i < pkt_dev->cflows; i++) { struct xfrm_state *x = pkt_dev->flows[i].x; if (x) { xfrm_state_put(x); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ce6356cd9f71..e505b5392e1e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -502,7 +502,9 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) shinfo->gso_segs = 0; shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; + shinfo->tx_flags.flags = 0; shinfo->frag_list = NULL; + memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); memset(skb, 0, offsetof(struct sk_buff, tail)); skb->data = skb->head + NET_SKB_PAD; @@ -1365,9 +1367,8 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) static inline struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, - struct sk_buff *skb) + struct sk_buff *skb, struct sock *sk) { - struct sock *sk = skb->sk; struct page *p = sk->sk_sndmsg_page; unsigned int off; @@ -1405,13 +1406,14 @@ new_page: */ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, unsigned int *len, unsigned int offset, - struct sk_buff *skb, int linear) + struct sk_buff *skb, int linear, + struct sock *sk) { if (unlikely(spd->nr_pages == PIPE_BUFFERS)) return 1; if (linear) { - page = linear_to_page(page, len, &offset, skb); + page = linear_to_page(page, len, &offset, skb, sk); if (!page) return 1; } else @@ -1442,7 +1444,8 @@ static inline void __segment_seek(struct page **page, unsigned int *poff, static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd, int linear) + struct splice_pipe_desc *spd, int linear, + struct sock *sk) { if (!*len) return 1; @@ -1465,7 +1468,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, &flen, poff, skb, linear)) + if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -1481,8 +1484,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff, * pipe is full or if we already spliced the requested length. */ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *len, - struct splice_pipe_desc *spd) + unsigned int *len, struct splice_pipe_desc *spd, + struct sock *sk) { int seg; @@ -1492,7 +1495,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, 1)) + offset, len, skb, spd, 1, sk)) return 1; /* @@ -1502,7 +1505,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd, 0)) + offset, len, skb, spd, 0, sk)) return 1; } @@ -1528,12 +1531,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, }; + struct sock *sk = skb->sk; /* * __skb_splice_bits() only fails if the output has no room left, * so no point in going over the frag_list for the error case. */ - if (__skb_splice_bits(skb, &offset, &tlen, &spd)) + if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) goto done; else if (!tlen) goto done; @@ -1545,14 +1549,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct sk_buff *list = skb_shinfo(skb)->frag_list; for (; list && tlen; list = list->next) { - if (__skb_splice_bits(list, &offset, &tlen, &spd)) + if (__skb_splice_bits(list, &offset, &tlen, &spd, sk)) break; } } done: if (spd.nr_pages) { - struct sock *sk = skb->sk; int ret; /* @@ -2285,7 +2288,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; - if (abs_offset < block_limit) { + if (abs_offset < block_limit && !st->frag_data) { *data = st->cur_skb->data + (abs_offset - st->stepped_offset); return block_limit - abs_offset; } diff --git a/net/core/sock.c b/net/core/sock.c index 0620046e4eba..7dbf3ffb35cc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1677,7 +1677,7 @@ static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_poll(sk->sk_sleep, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1686,7 +1686,8 @@ static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1700,7 +1701,8 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + POLLWRNORM | POLLWRBAND); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index b2cf91e4ccaa..5b919f7b45db 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -407,8 +407,8 @@ config INET_XFRM_MODE_BEET If unsure, say Y. config INET_LRO - tristate "Large Receive Offload (ipv4/tcp)" - + bool "Large Receive Offload (ipv4/tcp)" + default y ---help--- Support for Large Receive Offload (ipv4/tcp). diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec0ae490f0b6..33c7c85dfe40 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key = tn->key; + t_key cindex, key; struct tnode *tp; + preempt_disable(); + key = tn->key; + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); @@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn) if (IS_TNODE(tn)) tn = (struct tnode *)resize(t, (struct tnode *)tn); + preempt_enable(); return (struct node *)tn; } diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 90d22ae0a419..88bf051d0cbb 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */ __be32 root_server_addr = NONE; /* Address of NFS server */ u8 root_server_path[256] = { 0, }; /* Path to mount as root */ +u32 ic_dev_xid; /* Device under configuration */ + /* vendor class identifier */ static char vendor_class_identifier[253] __initdata; @@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop_unlock; } + /* Is it a reply for the device we are configuring? */ + if (b->xid != ic_dev_xid) { + if (net_ratelimit()) + printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); + goto drop_unlock; + } + /* Parse extensions */ if (ext_len >= 4 && !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ @@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void) get_random_bytes(&timeout, sizeof(timeout)); timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM); for (;;) { + /* Track the device we are configuring */ + ic_dev_xid = d->xid; + #ifdef IPCONFIG_BOOTP if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 35c5f6a5cb7c..831fe1879dc0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - rcu_read_lock(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + (2 * skb->dev->addr_len); + ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock(); + xt_info_rdunlock_bh(); if (hotdrop) return NF_DROP; @@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; ARPT_ENTRY_ITERATE(t->entries[curcpu], @@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); ARPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct arpt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; - - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ + return ERR_PTR(-ENOMEM); - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); - + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 82ee7c9049ff..2ec8d7290c40 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - - rcu_read_lock(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. + * with data used by 'current' CPU. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IPT_ENTRY_ITERATE(t->entries[curcpu], @@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } - -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } - -static inline int -zero_entry_counter(struct ipt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe65187810f0..3229e0a81ba6 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->dst.u3.ip, 0); + range->flags & IP_NAT_RANGE_PERSISTENT ? + (__force u32)tuple->dst.u3.ip : 0, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c40debe51b38..28205e5bfa9b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -784,8 +784,8 @@ static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; - struct rtable *rth, **rthp; - unsigned long length = 0, samples = 0; + struct rtable *rth, *aux, **rthp; + unsigned long samples = 0; unsigned long sum = 0, sum2 = 0; u64 mult; @@ -795,9 +795,9 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; - length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; i = (i + 1) & rt_hash_mask; rthp = &rt_hash_table[i].chain; @@ -809,8 +809,10 @@ static void rt_check_expire(void) if (*rthp == NULL) continue; + length = 0; spin_lock_bh(rt_hash_lock_addr(i)); while ((rth = *rthp) != NULL) { + prefetch(rth->u.dst.rt_next); if (rt_is_expired(rth)) { *rthp = rth->u.dst.rt_next; rt_free(rth); @@ -819,33 +821,30 @@ static void rt_check_expire(void) if (rth->u.dst.expires) { /* Entry is expired even if it is in use */ if (time_before_eq(jiffies, rth->u.dst.expires)) { +nofree: tmo >>= 1; rthp = &rth->u.dst.rt_next; /* - * Only bump our length if the hash - * inputs on entries n and n+1 are not - * the same, we only count entries on + * We only count entries on * a chain with equal hash inputs once * so that entries for different QOS * levels, and other non-hash input * attributes don't unfairly skew * the length computation */ - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; + for (aux = rt_hash_table[i].chain;;) { + if (aux == rth) { + length += ONE; + break; + } + if (compare_hash_inputs(&aux->fl, &rth->fl)) + break; + aux = aux->u.dst.rt_next; + } continue; } - } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { - tmo >>= 1; - rthp = &rth->u.dst.rt_next; - if ((*rthp == NULL) || - !compare_hash_inputs(&(*rthp)->fl, - &rth->fl)) - length += ONE; - continue; - } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; /* Cleanup aged off entries. */ *rthp = rth->u.dst.rt_next; @@ -1068,7 +1067,6 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; - struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1088,7 +1086,6 @@ restart: } rthp = &rt_hash_table[hash].chain; - rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1134,17 +1131,6 @@ restart: chain_length++; rthp = &rth->u.dst.rt_next; - - /* - * check to see if the next entry in the chain - * contains the same hash input values as rt. If it does - * This is where we will insert into the list, instead of - * at the head. This groups entries that differ by aspects not - * relvant to the hash function together, which we use to adjust - * our chain length - */ - if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) - rthi = rth; } if (cand) { @@ -1205,10 +1191,7 @@ restart: } } - if (rthi) - rt->u.dst.rt_next = rthi->u.dst.rt_next; - else - rt->u.dst.rt_next = rt_hash_table[hash].chain; + rt->u.dst.rt_next = rt_hash_table[hash].chain; #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { @@ -1224,10 +1207,7 @@ restart: * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - if (rthi) - rcu_assign_pointer(rthi->u.dst.rt_next, rt); - else - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; @@ -3397,7 +3377,7 @@ int __init ip_rt_init(void) 0, &rt_hash_log, &rt_hash_mask, - 0); + rhash_entries ? 0 : 512 * 1024); memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); rt_hash_lock_init(); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2451aeb5ac23..7a0f0b27bf1f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1081,8 +1081,7 @@ out_err: * this, no blocking and very strange errors 8) */ -static int tcp_recv_urg(struct sock *sk, long timeo, - struct msghdr *msg, int len, int flags) +static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) { struct tcp_sock *tp = tcp_sk(sk); @@ -1322,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct task_struct *user_recv = NULL; int copied_early = 0; struct sk_buff *skb; + u32 urg_hole = 0; lock_sock(sk); @@ -1533,7 +1533,8 @@ do_prequeue: } } } - if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) { + if ((flags & MSG_PEEK) && + (peek_seq - copied - urg_hole != tp->copied_seq)) { if (net_ratelimit()) printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n", current->comm, task_pid_nr(current)); @@ -1554,6 +1555,7 @@ do_prequeue: if (!urg_offset) { if (!sock_flag(sk, SOCK_URGINLINE)) { ++*seq; + urg_hole++; offset++; used--; if (!used) @@ -1697,7 +1699,7 @@ out: return err; recv_urg: - err = tcp_recv_urg(sk, timeo, msg, len, flags); + err = tcp_recv_urg(sk, msg, len, flags); goto out; } @@ -2512,6 +2514,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *p; struct tcphdr *th; struct tcphdr *th2; + unsigned int len; unsigned int thlen; unsigned int flags; unsigned int mss = 1; @@ -2532,6 +2535,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_gro_pull(skb, thlen); + len = skb_gro_len(skb); flags = tcp_flag_word(th); for (; (p = *head); head = &p->next) { @@ -2562,7 +2566,7 @@ found: mss = skb_shinfo(p)->gso_size; - flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb); + flush |= (len > mss) | !len; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { @@ -2575,7 +2579,7 @@ found: tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); out_check_final: - flush = skb_gro_len(skb) < mss; + flush = len < mss; flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2bc8e27a163d..eec3e6f9956c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_grow_window(sk, skb); } -static u32 tcp_rto_min(struct sock *sk) -{ - struct dst_entry *dst = __sk_dst_get(sk); - u32 rto_min = TCP_RTO_MIN; - - if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); - return rto_min; -} - /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge @@ -928,6 +918,8 @@ static void tcp_init_metrics(struct sock *sk) tcp_set_rto(sk); if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; + +cwnd: tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; return; @@ -942,6 +934,7 @@ reset: tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; } + goto cwnd; } static void tcp_update_reordering(struct sock *sk, const int metric, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c1f259d2d33b..59aec609cec6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -754,6 +754,36 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, tp->fackets_out -= decr; } +/* Pcount in the middle of the write queue got changed, we need to do various + * tweaks to fix counters + */ +static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->packets_out -= decr; + + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + tp->sacked_out -= decr; + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out -= decr; + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) + tp->lost_out -= decr; + + /* Reno case is special. Sigh... */ + if (tcp_is_reno(tp) && decr > 0) + tp->sacked_out -= min_t(u32, tp->sacked_out, decr); + + tcp_adjust_fackets_out(sk, skb, decr); + + if (tp->lost_skb_hint && + before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && + (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) + tp->lost_cnt_hint -= decr; + + tcp_verify_left_out(tp); +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -836,28 +866,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, int diff = old_factor - tcp_skb_pcount(skb) - tcp_skb_pcount(buff); - tp->packets_out -= diff; - - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) - tp->sacked_out -= diff; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= diff; - - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) - tp->lost_out -= diff; - - /* Adjust Reno SACK estimate. */ - if (tcp_is_reno(tp) && diff > 0) { - tcp_dec_pcount_approx_int(&tp->sacked_out, diff); - tcp_verify_left_out(tp); - } - tcp_adjust_fackets_out(sk, skb, diff); - - if (tp->lost_skb_hint && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) - tp->lost_cnt_hint -= diff; + if (diff) + tcp_adjust_pcount(sk, skb, diff); } /* Link BUFF into the send queue. */ @@ -1768,22 +1778,14 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; - if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= tcp_skb_pcount(next_skb); - if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST) - tp->lost_out -= tcp_skb_pcount(next_skb); - /* Reno case is special. Sigh... */ - if (tcp_is_reno(tp) && tp->sacked_out) - tcp_dec_pcount_approx(&tp->sacked_out, next_skb); - - tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb)); - tp->packets_out -= tcp_skb_pcount(next_skb); /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); if (next_skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = skb; + tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb)); + sk_wmem_free_skb(sk, next_skb); } @@ -1891,7 +1893,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ } else { - tcp_init_tso_segs(sk, skb, cur_mss); + int oldpcount = tcp_skb_pcount(skb); + + if (unlikely(oldpcount > 1)) { + tcp_init_tso_segs(sk, skb, cur_mss); + tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); + } } tcp_retrans_try_collapse(sk, skb, cur_mss); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a453aac91bd3..c6743eec9b7d 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); +static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) +{ + return min(tp->snd_ssthresh, tp->snd_cwnd-1); +} + static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); @@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; - if (diff > gamma && tp->snd_ssthresh > 2 ) { + if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { /* Going too fast. Time to slow down * and switch to congestion avoidance. */ - tp->snd_ssthresh = 2; /* Set cwnd to match the actual rate * exactly: @@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * utilization. */ tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); + tp->snd_ssthresh = tcp_vegas_ssthresh(tp); } else if (tp->snd_cwnd <= tp->snd_ssthresh) { /* Slow start. */ @@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) * we slow down. */ tp->snd_cwnd--; + tp->snd_ssthresh + = tcp_vegas_ssthresh(tp); } else if (diff < alpha) { /* We don't have enough extra packets * in the network, so speed up. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bda08a09357d..7a1d1ce22e66 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -222,7 +222,7 @@ fail: return error; } -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -1823,7 +1823,6 @@ EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); EXPORT_SYMBOL(udp_lib_get_port); -EXPORT_SYMBOL(ipv4_rcv_saddr_equal); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ec992159b5f8..ca8cb326d1d2 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -22,17 +22,17 @@ menuconfig IPV6 if IPV6 config IPV6_PRIVACY - bool "IPv6: Privacy Extensions support" + bool "IPv6: Privacy Extensions (RFC 3041) support" ---help--- Privacy Extensions for Stateless Address Autoconfiguration in IPv6 - support. With this option, additional periodically-alter - pseudo-random global-scope unicast address(es) will assigned to + support. With this option, additional periodically-altered + pseudo-random global-scope unicast address(es) will be assigned to your interface(s). - We use our standard pseudo random algorithm to generate randomized - interface identifier, instead of one described in RFC 3041. + We use our standard pseudo-random algorithm to generate the + randomized interface identifier, instead of one described in RFC 3041. - By default, kernel do not generate temporary addresses. + By default the kernel does not generate temporary addresses. To use temporary addresses, do echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr @@ -43,9 +43,9 @@ config IPV6_ROUTER_PREF bool "IPv6: Router Preference (RFC 4191) support" ---help--- Router Preference is an optional extension to the Router - Advertisement message to improve the ability of hosts - to pick more appropriate router, especially when the hosts - is placed in a multi-homed network. + Advertisement message which improves the ability of hosts + to pick an appropriate router, especially when the hosts + are placed in a multi-homed network. If unsure, say N. diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d31df0f4bc9a..a7fdf9a27f15 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -380,10 +380,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, default: goto sticky_done; } - - if ((rthdr->hdrlen & 1) || - (rthdr->hdrlen >> 1) != rthdr->segments_left) - goto sticky_done; } retv = 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e89cfa3a8f25..219e165aea10 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - rcu_read_lock(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - rcu_read_unlock(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IP6T_ENTRY_ITERATE(t->entries[curcpu], @@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IP6T_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct ip6t_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -1015,28 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; - - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ + return ERR_PTR(-ENOMEM); - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); + get_counters(private, counters); - xt_free_table_info(info); - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); + return counters; } static int @@ -1332,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1403,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1463,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + xt_info_wrlock(curcpu); + loc_cpu_entry = private->entries[curcpu]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); + unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 14e6724d5672..91490ad9302c 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -50,14 +50,14 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par) struct ipv6_opt_hdr _hdr; int hdrlen; - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return false; /* No more exthdr -> evaluate */ if (nexthdr == NEXTHDR_NONE) { temp |= MASK_NONE; break; } + /* Is there enough space for the next ext header? */ + if (len < (int)sizeof(struct ipv6_opt_hdr)) + return false; /* ESP -> evaluate */ if (nexthdr == NEXTHDR_ESP) { temp |= MASK_ESP; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1394ddb6e35c..032a5ec391c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -137,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -159,6 +160,7 @@ static struct rt6_info ip6_prohibit_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; @@ -176,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { } }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, .rt6i_metric = ~(u32) 0, .rt6i_ref = ATOMIC_INIT(1), }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6842dd2edd5b..8905712cfbb8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,6 +53,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); + __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; + __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); @@ -60,7 +62,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) - return ipv4_rcv_saddr_equal(sk, sk2); + return (!sk2_ipv6only && + (!sk_rcv_saddr || !sk2_rcv_saddr || + sk_rcv_saddr == sk2_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 0af823cf7f1f..5ee5a031bc93 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -72,6 +72,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) #endif skb->protocol = htons(ETH_P_IPV6); + skb->local_df = 1; return x->outer_mode->output2(x, skb); } diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 086d5ef098fd..811984d9324b 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -34,6 +34,7 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/sched.h> +#include <linux/seq_file.h> #include <linux/termios.h> #include <linux/tty.h> #include <linux/interrupt.h> @@ -72,8 +73,7 @@ static int ircomm_tty_control_indication(void *instance, void *sap, static void ircomm_tty_flow_indication(void *instance, void *sap, LOCAL_FLOW cmd); #ifdef CONFIG_PROC_FS -static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, - int *eof, void *unused); +static const struct file_operations ircomm_tty_proc_fops; #endif /* CONFIG_PROC_FS */ static struct tty_driver *driver; @@ -98,7 +98,7 @@ static const struct tty_operations ops = { .hangup = ircomm_tty_hangup, .wait_until_sent = ircomm_tty_wait_until_sent, #ifdef CONFIG_PROC_FS - .read_proc = ircomm_tty_read_proc, + .proc_fops = &ircomm_tty_proc_fops, #endif /* CONFIG_PROC_FS */ }; @@ -1245,150 +1245,170 @@ static void ircomm_tty_flow_indication(void *instance, void *sap, } #ifdef CONFIG_PROC_FS -static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf) +static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) { - int ret=0; + char sep; - ret += sprintf(buf+ret, "State: %s\n", ircomm_tty_state[self->state]); + seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]); - ret += sprintf(buf+ret, "Service type: "); + seq_puts(m, "Service type: "); if (self->service_type & IRCOMM_9_WIRE) - ret += sprintf(buf+ret, "9_WIRE"); + seq_puts(m, "9_WIRE"); else if (self->service_type & IRCOMM_3_WIRE) - ret += sprintf(buf+ret, "3_WIRE"); + seq_puts(m, "3_WIRE"); else if (self->service_type & IRCOMM_3_WIRE_RAW) - ret += sprintf(buf+ret, "3_WIRE_RAW"); + seq_puts(m, "3_WIRE_RAW"); else - ret += sprintf(buf+ret, "No common service type!\n"); - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Port name: %s\n", self->settings.port_name); - - ret += sprintf(buf+ret, "DTE status: "); - if (self->settings.dte & IRCOMM_RTS) - ret += sprintf(buf+ret, "RTS|"); - if (self->settings.dte & IRCOMM_DTR) - ret += sprintf(buf+ret, "DTR|"); - if (self->settings.dte) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "DCE status: "); - if (self->settings.dce & IRCOMM_CTS) - ret += sprintf(buf+ret, "CTS|"); - if (self->settings.dce & IRCOMM_DSR) - ret += sprintf(buf+ret, "DSR|"); - if (self->settings.dce & IRCOMM_CD) - ret += sprintf(buf+ret, "CD|"); - if (self->settings.dce & IRCOMM_RI) - ret += sprintf(buf+ret, "RI|"); - if (self->settings.dce) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Configuration: "); + seq_puts(m, "No common service type!\n"); + seq_putc(m, '\n'); + + seq_printf(m, "Port name: %s\n", self->settings.port_name); + + seq_printf(m, "DTE status:"); + sep = ' '; + if (self->settings.dte & IRCOMM_RTS) { + seq_printf(m, "%cRTS", sep); + sep = '|'; + } + if (self->settings.dte & IRCOMM_DTR) { + seq_printf(m, "%cDTR", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "DCE status:"); + sep = ' '; + if (self->settings.dce & IRCOMM_CTS) { + seq_printf(m, "%cCTS", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_DSR) { + seq_printf(m, "%cDSR", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_CD) { + seq_printf(m, "%cCD", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_RI) { + seq_printf(m, "%cRI", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "Configuration: "); if (!self->settings.null_modem) - ret += sprintf(buf+ret, "DTE <-> DCE\n"); + seq_puts(m, "DTE <-> DCE\n"); else - ret += sprintf(buf+ret, - "DTE <-> DTE (null modem emulation)\n"); - - ret += sprintf(buf+ret, "Data rate: %d\n", self->settings.data_rate); - - ret += sprintf(buf+ret, "Flow control: "); - if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) - ret += sprintf(buf+ret, "XON_XOFF_IN|"); - if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) - ret += sprintf(buf+ret, "XON_XOFF_OUT|"); - if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) - ret += sprintf(buf+ret, "RTS_CTS_IN|"); - if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) - ret += sprintf(buf+ret, "RTS_CTS_OUT|"); - if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) - ret += sprintf(buf+ret, "DSR_DTR_IN|"); - if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) - ret += sprintf(buf+ret, "DSR_DTR_OUT|"); - if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) - ret += sprintf(buf+ret, "ENQ_ACK_IN|"); - if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) - ret += sprintf(buf+ret, "ENQ_ACK_OUT|"); - if (self->settings.flow_control) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Flags: "); - if (self->flags & ASYNC_CTS_FLOW) - ret += sprintf(buf+ret, "ASYNC_CTS_FLOW|"); - if (self->flags & ASYNC_CHECK_CD) - ret += sprintf(buf+ret, "ASYNC_CHECK_CD|"); - if (self->flags & ASYNC_INITIALIZED) - ret += sprintf(buf+ret, "ASYNC_INITIALIZED|"); - if (self->flags & ASYNC_LOW_LATENCY) - ret += sprintf(buf+ret, "ASYNC_LOW_LATENCY|"); - if (self->flags & ASYNC_CLOSING) - ret += sprintf(buf+ret, "ASYNC_CLOSING|"); - if (self->flags & ASYNC_NORMAL_ACTIVE) - ret += sprintf(buf+ret, "ASYNC_NORMAL_ACTIVE|"); - if (self->flags) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Role: %s\n", self->client ? - "client" : "server"); - ret += sprintf(buf+ret, "Open count: %d\n", self->open_count); - ret += sprintf(buf+ret, "Max data size: %d\n", self->max_data_size); - ret += sprintf(buf+ret, "Max header size: %d\n", self->max_header_size); + seq_puts(m, "DTE <-> DTE (null modem emulation)\n"); + + seq_printf(m, "Data rate: %d\n", self->settings.data_rate); + + seq_puts(m, "Flow control:"); + sep = ' '; + if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) { + seq_printf(m, "%cXON_XOFF_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) { + seq_printf(m, "%cXON_XOFF_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) { + seq_printf(m, "%cRTS_CTS_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) { + seq_printf(m, "%cRTS_CTS_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) { + seq_printf(m, "%cDSR_DTR_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) { + seq_printf(m, "%cDSR_DTR_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) { + seq_printf(m, "%cENQ_ACK_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) { + seq_printf(m, "%cENQ_ACK_OUT", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "Flags:"); + sep = ' '; + if (self->flags & ASYNC_CTS_FLOW) { + seq_printf(m, "%cASYNC_CTS_FLOW", sep); + sep = '|'; + } + if (self->flags & ASYNC_CHECK_CD) { + seq_printf(m, "%cASYNC_CHECK_CD", sep); + sep = '|'; + } + if (self->flags & ASYNC_INITIALIZED) { + seq_printf(m, "%cASYNC_INITIALIZED", sep); + sep = '|'; + } + if (self->flags & ASYNC_LOW_LATENCY) { + seq_printf(m, "%cASYNC_LOW_LATENCY", sep); + sep = '|'; + } + if (self->flags & ASYNC_CLOSING) { + seq_printf(m, "%cASYNC_CLOSING", sep); + sep = '|'; + } + if (self->flags & ASYNC_NORMAL_ACTIVE) { + seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_printf(m, "Role: %s\n", self->client ? "client" : "server"); + seq_printf(m, "Open count: %d\n", self->open_count); + seq_printf(m, "Max data size: %d\n", self->max_data_size); + seq_printf(m, "Max header size: %d\n", self->max_header_size); if (self->tty) - ret += sprintf(buf+ret, "Hardware: %s\n", + seq_printf(m, "Hardware: %s\n", self->tty->hw_stopped ? "Stopped" : "Running"); - - ret += sprintf(buf+ret, "\n"); - return ret; } - -/* - * Function ircomm_tty_read_proc (buf, start, offset, len, eof, unused) - * - * - * - */ -static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, - int *eof, void *unused) +static int ircomm_tty_proc_show(struct seq_file *m, void *v) { struct ircomm_tty_cb *self; - int count = 0, l; - off_t begin = 0; unsigned long flags; spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags); self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty); - while ((self != NULL) && (count < 4000)) { + while (self != NULL) { if (self->magic != IRCOMM_TTY_MAGIC) break; - l = ircomm_tty_line_info(self, buf + count); - count += l; - if (count+begin > offset+len) - goto done; - if (count+begin < offset) { - begin += count; - count = 0; - } - + ircomm_tty_line_info(self, m); self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty); } - *eof = 1; -done: spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags); + return 0; +} - if (offset >= count+begin) - return 0; - *start = buf + (offset-begin); - return ((len < begin+count-offset) ? len : begin+count-offset); +static int ircomm_tty_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ircomm_tty_proc_show, NULL); } + +static const struct file_operations ircomm_tty_proc_fops = { + .owner = THIS_MODULE, + .open = ircomm_tty_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_PROC_FS */ MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>"); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 49e786535dc8..b51c9187c347 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -172,6 +172,7 @@ static void iucv_sock_close(struct sock *sk) err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); } + case IUCV_CLOSING: /* fall through */ sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); @@ -224,6 +225,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) spin_lock_init(&iucv_sk(sk)->message_q.lock); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; + iucv_sk(sk)->path = NULL; + memset(&iucv_sk(sk)->src_user_id , 0, 32); sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; @@ -811,6 +814,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + /* receive/dequeue next skb: + * the function understands MSG_PEEK and, thus, does not dequeue skb */ skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -858,9 +863,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, iucv_process_message_q(sk); spin_unlock_bh(&iucv->message_q.lock); } - - } else - skb_queue_head(&sk->sk_receive_queue, skb); + } done: return err ? : copied; @@ -934,6 +937,9 @@ static int iucv_sock_shutdown(struct socket *sock, int how) lock_sock(sk); switch (sk->sk_state) { + case IUCV_DISCONN: + case IUCV_CLOSING: + case IUCV_SEVERED: case IUCV_CLOSED: err = -ENOTCONN; goto fail; @@ -1113,8 +1119,12 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) struct sock_msg_q *save_msg; int len; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (sk->sk_shutdown & RCV_SHUTDOWN) { + iucv_message_reject(path, msg); return; + } + + spin_lock(&iucv->message_q.lock); if (!list_empty(&iucv->message_q.list) || !skb_queue_empty(&iucv->backlog_skb_q)) @@ -1129,9 +1139,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) if (!skb) goto save_message; - spin_lock(&iucv->message_q.lock); iucv_process_message(sk, skb, path, msg); - spin_unlock(&iucv->message_q.lock); + goto out_unlock; return; @@ -1142,8 +1151,9 @@ save_message: save_msg->path = path; save_msg->msg = *msg; - spin_lock(&iucv->message_q.lock); list_add_tail(&save_msg->list, &iucv->message_q.list); + +out_unlock: spin_unlock(&iucv->message_q.lock); } diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 60c16162474c..ecc3faf9f11a 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -33,7 +33,7 @@ choice ---help--- This option selects the default rate control algorithm mac80211 will use. Note that this default can still be - overriden through the ieee80211_default_rc_algo module + overridden through the ieee80211_default_rc_algo module parameter if different algorithms are available. config MAC80211_RC_DEFAULT_PID @@ -202,10 +202,3 @@ config MAC80211_DEBUG_COUNTERS and show them in debugfs. If unsure, say N. - -config MAC80211_VERBOSE_SPECT_MGMT_DEBUG - bool "Verbose Spectrum Management (IEEE 802.11h)debugging" - depends on MAC80211_DEBUG_MENU - ---help--- - Say Y here to print out verbose Spectrum Management (IEEE 802.11h) - debug messages. diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a6f1d8a869bc..14134193cd17 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -258,7 +258,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) (chan->max_power - local->power_constr_level) : chan->max_power; - if (local->user_power_level) + if (local->user_power_level >= 0) power = min(power, local->user_power_level); if (local->hw.conf.power_level != power) { @@ -757,6 +757,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.conf.long_frame_max_tx_count = 4; local->hw.conf.short_frame_max_tx_count = 7; local->hw.conf.radio_enabled = true; + local->user_power_level = -1; INIT_LIST_HEAD(&local->interfaces); mutex_init(&local->iflist_mtx); @@ -909,6 +910,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) goto fail_sta_info; + result = ieee80211_wep_init(local); + if (result < 0) { + printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", + wiphy_name(local->hw.wiphy), result); + goto fail_wep; + } + rtnl_lock(); result = dev_alloc_name(local->mdev, local->mdev->name); if (result < 0) @@ -930,14 +938,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_rate; } - result = ieee80211_wep_init(local); - - if (result < 0) { - printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", - wiphy_name(local->hw.wiphy), result); - goto fail_wep; - } - /* add one default STA interface if supported */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) { result = ieee80211_if_add(local, "wlan%d", NULL, @@ -967,13 +967,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return 0; -fail_wep: - rate_control_deinitialize(local); fail_rate: unregister_netdevice(local->mdev); local->mdev = NULL; fail_dev: rtnl_unlock(); + ieee80211_wep_free(local); +fail_wep: sta_info_stop(local); fail_sta_info: debugfs_hw_del(local); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7ecda9d59d8a..132938b073dc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -441,6 +441,9 @@ static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid) u8 index, indexn1, indexn2; struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim; + if (unlikely(!tim || elems->tim_len < 4)) + return false; + aid &= 0x3fff; index = aid / 8; mask = 1 << (aid & 7); @@ -945,9 +948,13 @@ void ieee80211_beacon_loss_work(struct work_struct *work) u.mgd.beacon_loss_work); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM " - "- sending probe request\n", sdata->dev->name, - sdata->u.mgd.bssid); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM " + "- sending probe request\n", sdata->dev->name, + sdata->u.mgd.bssid); + } +#endif ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, @@ -1007,9 +1014,13 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) (local->hw.conf.flags & IEEE80211_CONF_PS)) && time_after(jiffies, ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) { - printk(KERN_DEBUG "%s: beacon loss from AP %pM " - "- sending probe request\n", - sdata->dev->name, ifmgd->bssid); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: beacon loss from AP %pM " + "- sending probe request\n", + sdata->dev->name, ifmgd->bssid); + } +#endif ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, ifmgd->ssid_len, NULL, 0); @@ -1355,7 +1366,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, for (i = 0; i < elems.ext_supp_rates_len; i++) { int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; - bool is_basic = !!(elems.supp_rates[i] & 0x80); + bool is_basic = !!(elems.ext_supp_rates[i] & 0x80); if (rate > 110) have_higher_than_11mbit = true; @@ -1902,9 +1913,17 @@ static void ieee80211_sta_work(struct work_struct *work) static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + /* + * Need to update last_beacon to avoid beacon loss + * test to trigger. + */ + sdata->u.mgd.last_beacon = jiffies; + + queue_work(sdata->local->hw.workqueue, &sdata->u.mgd.work); + } } /* interface setup */ @@ -2105,12 +2124,13 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, dynamic_ps_enable_work); + /* XXX: using scan_sdata is completely broken! */ struct ieee80211_sub_if_data *sdata = local->scan_sdata; if (local->hw.conf.flags & IEEE80211_CONF_PS) return; - if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && sdata) ieee80211_send_nullfunc(local, sdata, 1); local->hw.conf.flags |= IEEE80211_CONF_PS; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 027302326498..81985d27cbda 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -156,8 +156,19 @@ int __ieee80211_resume(struct ieee80211_hw *hw) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: - WARN_ON(ieee80211_if_config(sdata, changed)); - ieee80211_bss_info_change_notify(sdata, ~0); + /* + * Driver's config_interface can fail if rfkill is + * enabled. Accommodate this return code. + * FIXME: When mac80211 has knowledge of rfkill + * state the code below can change back to: + * WARN(ieee80211_if_config(sdata, changed)); + * ieee80211_bss_info_change_notify(sdata, ~0); + */ + if (ieee80211_if_config(sdata, changed)) + printk(KERN_DEBUG "%s: failed to configure interface during resume\n", + sdata->dev->name); + else + ieee80211_bss_info_change_notify(sdata, ~0); break; case NL80211_IFTYPE_WDS: break; diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 3824990d340b..d9233ec50610 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -476,8 +476,8 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) return NULL; for (i = 0; i < IEEE80211_NUM_BANDS; i++) { - sband = hw->wiphy->bands[hw->conf.channel->band]; - if (sband->n_bitrates > max_rates) + sband = hw->wiphy->bands[i]; + if (sband && sband->n_bitrates > max_rates) max_rates = sband->n_bitrates; } diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index b16801cde06f..8bef9a1262ff 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -317,13 +317,44 @@ rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta) { struct rc_pid_sta_info *spinfo = priv_sta; + struct rc_pid_info *pinfo = priv; + struct rc_pid_rateinfo *rinfo = pinfo->rinfo; struct sta_info *si; + int i, j, tmp; + bool s; /* TODO: This routine should consider using RSSI from previous packets * as we need to have IEEE 802.1X auth succeed immediately after assoc.. * Until that method is implemented, we will use the lowest supported * rate as a workaround. */ + /* Sort the rates. This is optimized for the most common case (i.e. + * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed + * mapping too. */ + for (i = 0; i < sband->n_bitrates; i++) { + rinfo[i].index = i; + rinfo[i].rev_index = i; + if (RC_PID_FAST_START) + rinfo[i].diff = 0; + else + rinfo[i].diff = i * pinfo->norm_offset; + } + for (i = 1; i < sband->n_bitrates; i++) { + s = 0; + for (j = 0; j < sband->n_bitrates - i; j++) + if (unlikely(sband->bitrates[rinfo[j].index].bitrate > + sband->bitrates[rinfo[j + 1].index].bitrate)) { + tmp = rinfo[j].index; + rinfo[j].index = rinfo[j + 1].index; + rinfo[j + 1].index = tmp; + rinfo[rinfo[j].index].rev_index = j; + rinfo[rinfo[j + 1].index].rev_index = j + 1; + s = 1; + } + if (!s) + break; + } + spinfo->txrate_idx = rate_lowest_index(sband, sta); /* HACK */ si = container_of(sta, struct sta_info, sta); @@ -336,21 +367,22 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw, struct rc_pid_info *pinfo; struct rc_pid_rateinfo *rinfo; struct ieee80211_supported_band *sband; - int i, j, tmp; - bool s; + int i, max_rates = 0; #ifdef CONFIG_MAC80211_DEBUGFS struct rc_pid_debugfs_entries *de; #endif - sband = hw->wiphy->bands[hw->conf.channel->band]; - pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC); if (!pinfo) return NULL; - /* We can safely assume that sband won't change unless we get - * reinitialized. */ - rinfo = kmalloc(sizeof(*rinfo) * sband->n_bitrates, GFP_ATOMIC); + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = hw->wiphy->bands[i]; + if (sband && sband->n_bitrates > max_rates) + max_rates = sband->n_bitrates; + } + + rinfo = kmalloc(sizeof(*rinfo) * max_rates, GFP_ATOMIC); if (!rinfo) { kfree(pinfo); return NULL; @@ -368,33 +400,6 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw, pinfo->rinfo = rinfo; pinfo->oldrate = 0; - /* Sort the rates. This is optimized for the most common case (i.e. - * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed - * mapping too. */ - for (i = 0; i < sband->n_bitrates; i++) { - rinfo[i].index = i; - rinfo[i].rev_index = i; - if (RC_PID_FAST_START) - rinfo[i].diff = 0; - else - rinfo[i].diff = i * pinfo->norm_offset; - } - for (i = 1; i < sband->n_bitrates; i++) { - s = 0; - for (j = 0; j < sband->n_bitrates - i; j++) - if (unlikely(sband->bitrates[rinfo[j].index].bitrate > - sband->bitrates[rinfo[j + 1].index].bitrate)) { - tmp = rinfo[j].index; - rinfo[j].index = rinfo[j + 1].index; - rinfo[j + 1].index = tmp; - rinfo[rinfo[j].index].rev_index = j; - rinfo[rinfo[j + 1].index].rev_index = j + 1; - s = 1; - } - if (!s) - break; - } - #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 64ebe664effc..9776f73c51ad 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -29,6 +29,7 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff *skb, + struct ieee80211_rx_status *status, u16 mpdu_seq_num, int bar_req); /* @@ -1396,7 +1397,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) * mac80211. That also explains the __skb_push() * below. */ - align = (unsigned long)skb->data & 4; + align = (unsigned long)skb->data & 3; if (align) { if (WARN_ON(skb_headroom(skb) < 3)) { dev_kfree_skb(skb); @@ -1688,7 +1689,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) /* manage reordering buffer according to requested */ /* sequence number */ rcu_read_lock(); - ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, + ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, NULL, start_seq_num, 1); rcu_read_unlock(); return RX_DROP_UNUSABLE; @@ -2293,6 +2294,7 @@ static inline u16 seq_sub(u16 sq1, u16 sq2) static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff *skb, + struct ieee80211_rx_status *rxstatus, u16 mpdu_seq_num, int bar_req) { @@ -2374,6 +2376,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, /* put the frame in the reordering buffer */ tid_agg_rx->reorder_buf[index] = skb; + memcpy(tid_agg_rx->reorder_buf[index]->cb, rxstatus, + sizeof(*rxstatus)); tid_agg_rx->stored_mpdu_num++; /* release the buffer until next missing frame */ index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) @@ -2399,7 +2403,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, } static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, - struct sk_buff *skb) + struct sk_buff *skb, + struct ieee80211_rx_status *status) { struct ieee80211_hw *hw = &local->hw; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -2448,7 +2453,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, /* according to mpdu sequence number deal with reordering buffer */ mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; - ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, + ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, status, mpdu_seq_num, 0); end_reorder: return ret; @@ -2512,7 +2517,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, return; } - if (!ieee80211_rx_reorder_ampdu(local, skb)) + if (!ieee80211_rx_reorder_ampdu(local, skb, status)) __ieee80211_rx_handle_packet(hw, skb, status, rate); rcu_read_unlock(); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 3fb04a86444d..63656266d567 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -772,7 +772,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) hdrlen = ieee80211_hdrlen(hdr->frame_control); /* internal error, why is TX_FRAGMENTED set? */ - if (WARN_ON(skb->len <= frag_threshold)) + if (WARN_ON(skb->len + FCS_LEN <= frag_threshold)) return TX_DROP; /* diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index deb4ecec122a..959aa8379ccf 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -417,6 +417,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_channel* chan = local->hw.conf.channel; + bool reconf = false; u32 reconf_flags = 0; int new_power_level; @@ -427,14 +428,38 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, if (!chan) return -EINVAL; - if (data->txpower.fixed) - new_power_level = min(data->txpower.value, chan->max_power); - else /* Automatic power level setting */ - new_power_level = chan->max_power; + /* only change when not disabling */ + if (!data->txpower.disabled) { + if (data->txpower.fixed) { + if (data->txpower.value < 0) + return -EINVAL; + new_power_level = data->txpower.value; + /* + * Debatable, but we cannot do a fixed power + * level above the regulatory constraint. + * Use "iwconfig wlan0 txpower 15dBm" instead. + */ + if (new_power_level > chan->max_power) + return -EINVAL; + } else { + /* + * Automatic power level setting, max being the value + * passed in from userland. + */ + if (data->txpower.value < 0) + new_power_level = -1; + else + new_power_level = data->txpower.value; + } + + reconf = true; - local->user_power_level = new_power_level; - if (local->hw.conf.power_level != new_power_level) - reconf_flags |= IEEE80211_CONF_CHANGE_POWER; + /* + * ieee80211_hw_config() will limit to the channel's + * max power and possibly power constraint from AP. + */ + local->user_power_level = new_power_level; + } if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { local->hw.conf.radio_enabled = !(data->txpower.disabled); @@ -442,7 +467,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, ieee80211_led_radio(local, local->hw.conf.radio_enabled); } - if (reconf_flags) + if (reconf || reconf_flags) ieee80211_hw_config(local, reconf_flags); return 0; @@ -530,7 +555,7 @@ static int ieee80211_ioctl_giwfrag(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); frag->value = local->fragmentation_threshold; - frag->disabled = (frag->value >= IEEE80211_MAX_RTS_THRESHOLD); + frag->disabled = (frag->value >= IEEE80211_MAX_FRAG_THRESHOLD); frag->fixed = 1; return 0; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2c967e4f706c..cb3ad741ebf8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -52,7 +52,7 @@ config NF_CT_ACCT Please note that currently this option only sets a default state. You may change it at boot time with nf_conntrack.acct=0/1 kernel - paramater or by loading the nf_conntrack module with acct=0/1. + parameter or by loading the nf_conntrack module with acct=0/1. You may also disable/enable it on a running system with: sysctl net.netfilter.nf_conntrack_acct=0/1 @@ -275,6 +275,8 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +endif # NF_CONNTRACK + # transparent proxy support config NETFILTER_TPROXY tristate "Transparent proxying support (EXPERIMENTAL)" @@ -290,8 +292,6 @@ config NETFILTER_TPROXY To compile it as a module, choose M here. If unsure, say N. -endif # NF_CONNTRACK - config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n @@ -374,7 +374,7 @@ config NETFILTER_XT_TARGET_HL config NETFILTER_XT_TARGET_LED tristate '"LED" target support' - depends on LEDS_CLASS && LED_TRIGGERS + depends on LEDS_CLASS && LEDS_TRIGGERS depends on NETFILTER_ADVANCED help This option adds a `LED' target, which allows you to blink LEDs in @@ -837,6 +837,7 @@ config NETFILTER_XT_MATCH_SOCKET depends on NETFILTER_TPROXY depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on !NF_CONNTRACK || NF_CONNTRACK select NF_DEFRAG_IPV4 help This option adds a `socket' match, which can be used to match diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 60aba45023ff..77bfdfeb966e 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -260,7 +260,10 @@ struct ip_vs_conn *ip_vs_ct_in_get list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == af && ip_vs_addr_equal(af, s_addr, &cp->caddr) && - ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + /* protocol should only be IPPROTO_IP if + * d_addr is a fwmark */ + ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af, + d_addr, &cp->vaddr) && s_port == cp->cport && d_port == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && protocol == cp->protocol) { @@ -698,7 +701,9 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, cp->cport = cport; ip_vs_addr_copy(af, &cp->vaddr, vaddr); cp->vport = vport; - ip_vs_addr_copy(af, &cp->daddr, daddr); + /* proto should only be IPPROTO_IP if d_addr is a fwmark */ + ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af, + &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; spin_lock_init(&cp->lock); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index cb3e031335eb..8dddb17a947a 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -278,7 +278,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ if (svc->fwmark) { union nf_inet_addr fwmark = { - .all = { 0, 0, 0, htonl(svc->fwmark) } + .ip = htonl(svc->fwmark) }; ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0, @@ -306,7 +306,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ if (svc->fwmark) { union nf_inet_addr fwmark = { - .all = { 0, 0, 0, htonl(svc->fwmark) } + .ip = htonl(svc->fwmark) }; ct = ip_vs_conn_new(svc->af, IPPROTO_IP, diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3940f996a2e4..afde8f991646 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -372,7 +372,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; - int ret = 0; + int ret = 1; if (!master_help->helper) { ret = -ESHUTDOWN; @@ -412,41 +412,23 @@ out: return ret; } -int nf_ct_expect_related(struct nf_conntrack_expect *expect) +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 pid, int report) { int ret; spin_lock_bh(&nf_conntrack_lock); ret = __nf_ct_expect_check(expect); - if (ret < 0) + if (ret <= 0) goto out; + ret = 0; nf_ct_expect_insert(expect); - atomic_inc(&expect->use); - spin_unlock_bh(&nf_conntrack_lock); - nf_ct_expect_event(IPEXP_NEW, expect); - nf_ct_expect_put(expect); - return ret; -out: spin_unlock_bh(&nf_conntrack_lock); + nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_expect_related); - -int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report) -{ - int ret; - - spin_lock_bh(&nf_conntrack_lock); - ret = __nf_ct_expect_check(expect); - if (ret < 0) - goto out; - nf_ct_expect_insert(expect); out: spin_unlock_bh(&nf_conntrack_lock); - if (ret == 0) - nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 30b8e9009f99..0fa5a422959f 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -176,7 +176,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } /* Get rid of expecteds, set helpers to NULL. */ - hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) + hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c6439c77953c..c523f0b8cee5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -512,7 +512,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); if (!skb) - return NOTIFY_DONE; + goto errout; b = skb->tail; @@ -591,8 +591,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: - nfnetlink_set_err(0, group, -ENOBUFS); kfree_skb(skb); +errout: + nfnetlink_set_err(0, group, -ENOBUFS); return NOTIFY_DONE; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -987,7 +988,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); - char *helpname; + char *helpname = NULL; int err; /* don't change helper of sibling connections */ @@ -1185,28 +1186,6 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) return 0; } -static inline void -ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report) -{ - unsigned int events = 0; - - if (test_bit(IPS_EXPECTED_BIT, &ct->status)) - events |= IPCT_RELATED; - else - events |= IPCT_NEW; - - nf_conntrack_event_report(IPCT_STATUS | - IPCT_HELPER | - IPCT_REFRESH | - IPCT_PROTOINFO | - IPCT_NATSEQADJ | - IPCT_MARK | - events, - ct, - pid, - report); -} - static struct nf_conn * ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, @@ -1230,7 +1209,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], rcu_read_lock(); if (cda[CTA_HELP]) { - char *helpname; + char *helpname = NULL; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); if (err < 0) @@ -1372,6 +1351,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { struct nf_conn *ct; + enum ip_conntrack_events events; ct = ctnetlink_create_conntrack(cda, &otuple, &rtuple, u3); @@ -1382,9 +1362,18 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = 0; nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - ctnetlink_event_report(ct, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + if (test_bit(IPS_EXPECTED_BIT, &ct->status)) + events = IPCT_RELATED; + else + events = IPCT_NEW; + + nf_conntrack_event_report(IPCT_STATUS | + IPCT_HELPER | + IPCT_PROTOINFO | + IPCT_NATSEQADJ | + IPCT_MARK | events, + ct, NETLINK_CB(skb).pid, + nlmsg_report(nlh)); nf_ct_put(ct); } else spin_unlock_bh(&nf_conntrack_lock); @@ -1403,9 +1392,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err == 0) { nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - ctnetlink_event_report(ct, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + nf_conntrack_event_report(IPCT_STATUS | + IPCT_HELPER | + IPCT_PROTOINFO | + IPCT_NATSEQADJ | + IPCT_MARK, + ct, NETLINK_CB(skb).pid, + nlmsg_report(nlh)); nf_ct_put(ct); } else spin_unlock_bh(&nf_conntrack_lock); @@ -1564,7 +1557,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) - return NOTIFY_DONE; + goto errout; b = skb->tail; @@ -1589,8 +1582,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: - nfnetlink_set_err(0, 0, -ENOBUFS); kfree_skb(skb); +errout: + nfnetlink_set_err(0, 0, -ENOBUFS); return NOTIFY_DONE; } #endif diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 50dac8dbe7d8..aee0d6bea309 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -22,6 +22,7 @@ #include <linux/netfilter/nfnetlink_conntrack.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> static DEFINE_RWLOCK(dccp_lock); @@ -553,6 +554,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.state = new_state; write_unlock_bh(&dccp_lock); + if (new_state != old_state) + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); + dn = dccp_pernet(net); nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); @@ -633,6 +637,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, if (!nest_parms) goto nla_put_failure; NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); + NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); nla_nest_end(skb, nest_parms); read_unlock_bh(&dccp_lock); return 0; @@ -644,6 +650,7 @@ nla_put_failure: static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -661,11 +668,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return err; if (!tb[CTA_PROTOINFO_DCCP_STATE] || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) + !tb[CTA_PROTOINFO_DCCP_ROLE] || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { return -EINVAL; + } write_lock_bh(&dccp_lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); + if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; + } else { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; + } write_unlock_bh(&dccp_lock); return 0; } @@ -777,6 +794,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .print_conntrack = dccp_print_conntrack, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = dccp_to_nlattr, + .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b5ccf2b4b2e7..97a6e93d742e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -634,6 +634,14 @@ static bool tcp_in_window(const struct nf_conn *ct, sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } + if (tcph->ack) { + if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { + sender->td_maxack = ack; + sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; + } else if (after(ack, sender->td_maxack)) + sender->td_maxack = ack; + } + /* * Update receiver data. */ @@ -919,6 +927,16 @@ static int tcp_packet(struct nf_conn *ct, return -NF_ACCEPT; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET + && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) + && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { + /* Invalid RST */ + write_unlock_bh(&tcp_lock); + if (LOG_INVALID(net, IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, NULL, + "nf_ct_tcp: invalid RST "); + return -NF_ACCEPT; + } + if (index == TCP_RST_SET && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_SYN_SET) || (!test_bit(IPS_ASSURED_BIT, &ct->status) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4614696c1b88..0badedc542d3 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -204,6 +204,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8bb998fe098b..beb37311e1a5 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -36,10 +36,14 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) int nf_log_register(u_int8_t pf, struct nf_logger *logger) { const struct nf_logger *llog; + int i; if (pf >= ARRAY_SIZE(nf_loggers)) return -EINVAL; + for (i = 0; i < ARRAY_SIZE(logger->list); i++) + INIT_LIST_HEAD(&logger->list[i]); + mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 2785d66a7e38..b8ab37ad7ed5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -203,7 +203,7 @@ static int __init nfnetlink_init(void) nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -1; + return -ENOMEM; } return 0; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fd326ac27ec8..66a6dd5c519a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -581,6 +581,12 @@ nfulnl_log_packet(u_int8_t pf, + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + if (in && skb_mac_header_was_set(skb)) { + size += nla_total_size(skb->dev->hard_header_len) + + nla_total_size(sizeof(u_int16_t)) /* hwtype */ + + nla_total_size(sizeof(u_int16_t)); /* hwlen */ + } + spin_lock_bh(&inst->lock); if (inst->flags & NFULNL_CFG_F_SEQ) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 509a95621f9f..150e5cf62f85 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, - struct xt_table_info *newinfo) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) { - void *p = oldinfo->entries[cpu]; - rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); - newinfo->entries[cpu] = p; - } - -} -EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); - /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif +DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); +EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); + + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error) { - struct xt_table_info *oldinfo, *private; + struct xt_table_info *private; /* Do the substitution. */ - mutex_lock(&table->lock); + local_bh_disable(); private = table->private; + /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - mutex_unlock(&table->lock); + local_bh_enable(); *error = -EAGAIN; return NULL; } - oldinfo = private; - rcu_assign_pointer(table->private, newinfo); - newinfo->initial_entries = oldinfo->initial_entries; - mutex_unlock(&table->lock); - synchronize_net(); - return oldinfo; + table->private = newinfo; + newinfo->initial_entries = private->initial_entries; + + /* + * Even though table entries have now been swapped, other CPU's + * may still be using the old entries. This is okay, because + * resynchronization happens because of the locking done + * during the get_counters() routine. + */ + local_bh_enable(); + + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; - mutex_init(&table->lock); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; @@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = { static int __init xt_init(void) { - int i, rv; + unsigned int i; + int rv; + + for_each_possible_cpu(i) { + struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); + spin_lock_init(&lock->lock); + lock->readers = 0; + } xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt) diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 6c4847662b85..69a639f35403 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -135,7 +135,13 @@ static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; - if (info->node_mask >= (1 << info->total_nodes)) { + if (info->total_nodes > XT_CLUSTER_NODES_MAX) { + printk(KERN_ERR "xt_cluster: you have exceeded the maximum " + "number of cluster nodes (%u > %u)\n", + info->total_nodes, XT_CLUSTER_NODES_MAX); + return false; + } + if (info->node_mask >= (1ULL << info->total_nodes)) { printk(KERN_ERR "xt_cluster: this node mask cannot be " "higher than the total number of nodes\n"); return false; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a5b5369c30f9..219dcdbe388c 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -926,7 +926,7 @@ static int dl_seq_show(struct seq_file *s, void *v) if (!hlist_empty(&htable->hash[*bucket])) { hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) if (dl_seq_real_show(ent, htable->family, s)) - return 1; + return -1; } return 0; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 791e030ea903..eb0ceb846527 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -474,7 +474,7 @@ static ssize_t recent_old_proc_write(struct file *file, struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; - __be32 addr; + union nf_inet_addr addr = {}; int add; if (size > sizeof(buf)) @@ -506,14 +506,13 @@ static ssize_t recent_old_proc_write(struct file *file, add = 1; break; } - addr = in_aton(c); + addr.ip = in_aton(c); spin_lock_bh(&recent_lock); - e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0); + e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0); if (e == NULL) { if (add) - recent_entry_init(t, (const void *)&addr, - NFPROTO_IPV4, 0); + recent_entry_init(t, &addr, NFPROTO_IPV4, 0); } else { if (add) recent_entry_update(t, e); diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 834c6eb7f484..c0519139679e 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -256,13 +256,11 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, { struct netlbl_af4list *entry; - entry = netlbl_af4list_search(addr, head); - if (entry != NULL && entry->addr == addr && entry->mask == mask) { - netlbl_af4list_remove_entry(entry); - return entry; - } - - return NULL; + entry = netlbl_af4list_search_exact(addr, mask, head); + if (entry == NULL) + return NULL; + netlbl_af4list_remove_entry(entry); + return entry; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -299,15 +297,11 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, { struct netlbl_af6list *entry; - entry = netlbl_af6list_search(addr, head); - if (entry != NULL && - ipv6_addr_equal(&entry->addr, addr) && - ipv6_addr_equal(&entry->mask, mask)) { - netlbl_af6list_remove_entry(entry); - return entry; - } - - return NULL; + entry = netlbl_af6list_search_exact(addr, mask, head); + if (entry == NULL) + return NULL; + netlbl_af6list_remove_entry(entry); + return entry; } #endif /* IPv6 */ diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4e705f87969f..3be0e016ab7d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1084,8 +1084,10 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, /* Build a packet - the conventional user limit is 236 bytes. We can do ludicrously large NetROM frames but must not overflow */ - if (len > 65536) - return -EMSGSIZE; + if (len > 65536) { + err = -EMSGSIZE; + goto out; + } SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n"); size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 74776de523ec..f546e81acc45 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1758,8 +1758,9 @@ static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) static inline char *alloc_one_pg_vec_page(unsigned long order) { - return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO, - order); + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; + + return (char *) __get_free_pages(gfp_flags, order); } static char **alloc_pg_vec(struct tpacket_req *req, int order) diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig index 51a5669573f2..6ec7d55b1769 100644 --- a/net/phonet/Kconfig +++ b/net/phonet/Kconfig @@ -6,7 +6,7 @@ config PHONET tristate "Phonet protocols family" help The Phone Network protocol (PhoNet) is a packet-oriented - communication protocol developped by Nokia for use with its modems. + communication protocol developed by Nokia for use with its modems. This is required for Maemo to use cellular data connectivity (if supported). It can also be used to control Nokia phones diff --git a/net/rds/ib.c b/net/rds/ib.c index 06a7b798d9a7..4933b380985e 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); struct list_head rds_ib_devices; +/* NOTE: if also grabbing ibdev lock, grab this first */ DEFINE_SPINLOCK(ib_nodev_conns_lock); LIST_HEAD(ib_nodev_conns); @@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device) kfree(i_ipaddr); } - rds_ib_remove_conns(rds_ibdev); + rds_ib_destroy_conns(rds_ibdev); if (rds_ibdev->mr_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); @@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr) void rds_ib_exit(void) { rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); - rds_ib_remove_nodev_conns(); + rds_ib_destroy_nodev_conns(); ib_unregister_client(&rds_ib_client); rds_ib_sysctl_exit(); rds_ib_recv_exit(); diff --git a/net/rds/ib.h b/net/rds/ib.h index 8be563a1363a..069206cae733 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -108,7 +108,12 @@ struct rds_ib_connection { /* sending acks */ unsigned long i_ack_flags; +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_t i_ack_next; /* next ACK to send */ +#else + spinlock_t i_ack_lock; /* protect i_ack_next */ u64 i_ack_next; /* next ACK to send */ +#endif struct rds_header *i_ack; struct ib_send_wr i_ack_wr; struct ib_sge i_ack_sge; @@ -267,9 +272,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, /* ib_rdma.c */ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); -int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); -void rds_ib_remove_nodev_conns(void); -void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); +void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); +void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); +void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock); +static inline void rds_ib_destroy_nodev_conns(void) +{ + __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock); +} +static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev) +{ + __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock); +} struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); @@ -355,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge) return &sge[1]; } -static inline void rds_ib_set_64bit(u64 *ptr, u64 val) -{ -#if BITS_PER_LONG == 64 - *ptr = val; -#else - set_64bit(ptr, val); -#endif -} - #endif diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0532237bd128..f8e40e1a6038 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); if (err) printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); - err = rds_ib_add_conn(rds_ibdev, conn); - if (err) - printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err); + rds_ib_add_conn(rds_ibdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ @@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) /* * Move connection back to the nodev list. */ - if (ic->rds_ibdev) { - - spin_lock_irq(&ic->rds_ibdev->spinlock); - BUG_ON(list_empty(&ic->ib_node)); - list_del(&ic->ib_node); - spin_unlock_irq(&ic->rds_ibdev->spinlock); - - spin_lock_irq(&ib_nodev_conns_lock); - list_add_tail(&ic->ib_node, &ib_nodev_conns); - spin_unlock_irq(&ib_nodev_conns_lock); - ic->rds_ibdev = NULL; - } + if (ic->rds_ibdev) + rds_ib_remove_conn(ic->rds_ibdev, conn); ic->i_cm_id = NULL; ic->i_pd = NULL; @@ -648,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) /* Clear the ACK state */ clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); - rds_ib_set_64bit(&ic->i_ack_next, 0); +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_set(&ic->i_ack_next, 0); +#else + ic->i_ack_next = 0; +#endif ic->i_ack_recv = 0; /* Clear flow control state */ @@ -681,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) INIT_LIST_HEAD(&ic->ib_node); mutex_init(&ic->i_recv_mutex); +#ifndef KERNEL_HAS_ATOMIC64 + spin_lock_init(&ic->i_ack_lock); +#endif /* * rds_ib_conn_shutdown() waits for these to be emptied so they @@ -701,11 +696,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) return 0; } +/* + * Free a connection. Connection must be shut down and not set for reconnect. + */ void rds_ib_conn_free(void *arg) { struct rds_ib_connection *ic = arg; + spinlock_t *lock_ptr; + rdsdebug("ic %p\n", ic); + + /* + * Conn is either on a dev's list or on the nodev list. + * A race with shutdown() or connect() would cause problems + * (since rds_ibdev would change) but that should never happen. + */ + lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock; + + spin_lock_irq(lock_ptr); list_del(&ic->ib_node); + spin_unlock_irq(lock_ptr); + kfree(ic); } diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 69a6289ed672..81033af93020 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) return rds_ib_add_ipaddr(rds_ibdev, ipaddr); } -int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) +void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; @@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn BUG_ON(list_empty(&ib_nodev_conns)); BUG_ON(list_empty(&ic->ib_node)); list_del(&ic->ib_node); - spin_unlock_irq(&ib_nodev_conns_lock); spin_lock_irq(&rds_ibdev->spinlock); list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); spin_unlock_irq(&rds_ibdev->spinlock); + spin_unlock_irq(&ib_nodev_conns_lock); ic->rds_ibdev = rds_ibdev; - - return 0; } -void rds_ib_remove_nodev_conns(void) +void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) { - struct rds_ib_connection *ic, *_ic; - LIST_HEAD(tmp_list); + struct rds_ib_connection *ic = conn->c_transport_data; - /* avoid calling conn_destroy with irqs off */ - spin_lock_irq(&ib_nodev_conns_lock); - list_splice(&ib_nodev_conns, &tmp_list); - INIT_LIST_HEAD(&ib_nodev_conns); - spin_unlock_irq(&ib_nodev_conns_lock); + /* place conn on nodev_conns_list */ + spin_lock(&ib_nodev_conns_lock); - list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { - if (ic->conn->c_passive) - rds_conn_destroy(ic->conn->c_passive); - rds_conn_destroy(ic->conn); - } + spin_lock_irq(&rds_ibdev->spinlock); + BUG_ON(list_empty(&ic->ib_node)); + list_del(&ic->ib_node); + spin_unlock_irq(&rds_ibdev->spinlock); + + list_add_tail(&ic->ib_node, &ib_nodev_conns); + + spin_unlock(&ib_nodev_conns_lock); + + ic->rds_ibdev = NULL; } -void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) +void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) { struct rds_ib_connection *ic, *_ic; LIST_HEAD(tmp_list); /* avoid calling conn_destroy with irqs off */ - spin_lock_irq(&rds_ibdev->spinlock); - list_splice(&rds_ibdev->conn_list, &tmp_list); - INIT_LIST_HEAD(&rds_ibdev->conn_list); - spin_unlock_irq(&rds_ibdev->spinlock); + spin_lock_irq(list_lock); + list_splice(list, &tmp_list); + INIT_LIST_HEAD(list); + spin_unlock_irq(list_lock); list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { if (ic->conn->c_passive) diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 5061b5502162..36d931573ff4 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic) * room for it beyond the ring size. Send completion notices its special * wr_id and avoids working with the ring in that case. */ +#ifndef KERNEL_HAS_ATOMIC64 static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required) { - rds_ib_set_64bit(&ic->i_ack_next, seq); + unsigned long flags; + + spin_lock_irqsave(&ic->i_ack_lock, flags); + ic->i_ack_next = seq; + if (ack_required) + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + spin_unlock_irqrestore(&ic->i_ack_lock, flags); +} + +static u64 rds_ib_get_ack(struct rds_ib_connection *ic) +{ + unsigned long flags; + u64 seq; + + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + + spin_lock_irqsave(&ic->i_ack_lock, flags); + seq = ic->i_ack_next; + spin_unlock_irqrestore(&ic->i_ack_lock, flags); + + return seq; +} +#else +static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, + int ack_required) +{ + atomic64_set(&ic->i_ack_next, seq); if (ack_required) { smp_mb__before_clear_bit(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); @@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic) clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); smp_mb__after_clear_bit(); - return ic->i_ack_next; + return atomic64_read(&ic->i_ack_next); } +#endif + static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) { @@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi * - i_ack_next, which is the last sequence number we received * * Potentially, send queue and receive queue handlers can run concurrently. + * It would be nice to not have to use a spinlock to synchronize things, + * but the one problem that rules this out is that 64bit updates are + * not atomic on all platforms. Things would be a lot simpler if + * we had atomic64 or maybe cmpxchg64 everywhere. * * Reconnecting complicates this picture just slightly. When we * reconnect, we may be seeing duplicate packets. The peer diff --git a/net/rds/iw.c b/net/rds/iw.c index 1b56905c4c08..b732efb5b634 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR struct list_head rds_iw_devices; +/* NOTE: if also grabbing iwdev lock, grab this first */ DEFINE_SPINLOCK(iw_nodev_conns_lock); LIST_HEAD(iw_nodev_conns); @@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device) } spin_unlock_irq(&rds_iwdev->spinlock); - rds_iw_remove_conns(rds_iwdev); + rds_iw_destroy_conns(rds_iwdev); if (rds_iwdev->mr_pool) rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); @@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr) void rds_iw_exit(void) { rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); - rds_iw_remove_nodev_conns(); + rds_iw_destroy_nodev_conns(); ib_unregister_client(&rds_iw_client); rds_iw_sysctl_exit(); rds_iw_recv_exit(); diff --git a/net/rds/iw.h b/net/rds/iw.h index 0ddda34f2a1c..b4fb27252895 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -131,7 +131,12 @@ struct rds_iw_connection { /* sending acks */ unsigned long i_ack_flags; +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_t i_ack_next; /* next ACK to send */ +#else + spinlock_t i_ack_lock; /* protect i_ack_next */ u64 i_ack_next; /* next ACK to send */ +#endif struct rds_header *i_ack; struct ib_send_wr i_ack_wr; struct ib_sge i_ack_sge; @@ -294,9 +299,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, /* ib_rdma.c */ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); -int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); -void rds_iw_remove_nodev_conns(void); -void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); +void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); +void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); +void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock); +static inline void rds_iw_destroy_nodev_conns(void) +{ + __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock); +} +static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev) +{ + __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock); +} struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); @@ -383,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge) return &sge[1]; } -static inline void rds_iw_set_64bit(u64 *ptr, u64 val) -{ -#if BITS_PER_LONG == 64 - *ptr = val; -#else - set_64bit(ptr, val); -#endif -} - #endif diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 57ecb3d4b8a5..a416b0d492b1 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); if (err) printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); - err = rds_iw_add_conn(rds_iwdev, conn); - if (err) - printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err); + rds_iw_add_conn(rds_iwdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ @@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) * Move connection back to the nodev list. * Remove cm_id from the device cm_id list. */ - if (ic->rds_iwdev) { - - spin_lock_irq(&ic->rds_iwdev->spinlock); - BUG_ON(list_empty(&ic->iw_node)); - list_del(&ic->iw_node); - spin_unlock_irq(&ic->rds_iwdev->spinlock); - - spin_lock_irq(&iw_nodev_conns_lock); - list_add_tail(&ic->iw_node, &iw_nodev_conns); - spin_unlock_irq(&iw_nodev_conns_lock); - rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); - ic->rds_iwdev = NULL; - } + if (ic->rds_iwdev) + rds_iw_remove_conn(ic->rds_iwdev, conn); rdma_destroy_id(ic->i_cm_id); @@ -672,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) /* Clear the ACK state */ clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); - rds_iw_set_64bit(&ic->i_ack_next, 0); +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_set(&ic->i_ack_next, 0); +#else + ic->i_ack_next = 0; +#endif ic->i_ack_recv = 0; /* Clear flow control state */ @@ -706,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) INIT_LIST_HEAD(&ic->iw_node); mutex_init(&ic->i_recv_mutex); +#ifndef KERNEL_HAS_ATOMIC64 + spin_lock_init(&ic->i_ack_lock); +#endif /* * rds_iw_conn_shutdown() waits for these to be emptied so they @@ -726,11 +720,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) return 0; } +/* + * Free a connection. Connection must be shut down and not set for reconnect. + */ void rds_iw_conn_free(void *arg) { struct rds_iw_connection *ic = arg; + spinlock_t *lock_ptr; + rdsdebug("ic %p\n", ic); + + /* + * Conn is either on a dev's list or on the nodev list. + * A race with shutdown() or connect() would cause problems + * (since rds_iwdev would change) but that should never happen. + */ + lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock; + + spin_lock_irq(lock_ptr); list_del(&ic->iw_node); + spin_unlock_irq(lock_ptr); + kfree(ic); } diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 1c02a8f952d0..dcdb37da80f2 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i return rds_iw_add_cm_id(rds_iwdev, cm_id); } -int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) +void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) { struct rds_iw_connection *ic = conn->c_transport_data; @@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn BUG_ON(list_empty(&iw_nodev_conns)); BUG_ON(list_empty(&ic->iw_node)); list_del(&ic->iw_node); - spin_unlock_irq(&iw_nodev_conns_lock); spin_lock_irq(&rds_iwdev->spinlock); list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); spin_unlock_irq(&rds_iwdev->spinlock); + spin_unlock_irq(&iw_nodev_conns_lock); ic->rds_iwdev = rds_iwdev; - - return 0; } -void rds_iw_remove_nodev_conns(void) +void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) { - struct rds_iw_connection *ic, *_ic; - LIST_HEAD(tmp_list); + struct rds_iw_connection *ic = conn->c_transport_data; - /* avoid calling conn_destroy with irqs off */ - spin_lock_irq(&iw_nodev_conns_lock); - list_splice(&iw_nodev_conns, &tmp_list); - INIT_LIST_HEAD(&iw_nodev_conns); - spin_unlock_irq(&iw_nodev_conns_lock); + /* place conn on nodev_conns_list */ + spin_lock(&iw_nodev_conns_lock); - list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { - if (ic->conn->c_passive) - rds_conn_destroy(ic->conn->c_passive); - rds_conn_destroy(ic->conn); - } + spin_lock_irq(&rds_iwdev->spinlock); + BUG_ON(list_empty(&ic->iw_node)); + list_del(&ic->iw_node); + spin_unlock_irq(&rds_iwdev->spinlock); + + list_add_tail(&ic->iw_node, &iw_nodev_conns); + + spin_unlock(&iw_nodev_conns_lock); + + rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); + ic->rds_iwdev = NULL; } -void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) +void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock) { struct rds_iw_connection *ic, *_ic; LIST_HEAD(tmp_list); /* avoid calling conn_destroy with irqs off */ - spin_lock_irq(&rds_iwdev->spinlock); - list_splice(&rds_iwdev->conn_list, &tmp_list); - INIT_LIST_HEAD(&rds_iwdev->conn_list); - spin_unlock_irq(&rds_iwdev->spinlock); + spin_lock_irq(list_lock); + list_splice(list, &tmp_list); + INIT_LIST_HEAD(list); + spin_unlock_irq(list_lock); list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { if (ic->conn->c_passive) diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index a1931f0027a2..fde470fa50d5 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic) * room for it beyond the ring size. Send completion notices its special * wr_id and avoids working with the ring in that case. */ +#ifndef KERNEL_HAS_ATOMIC64 static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, int ack_required) { - rds_iw_set_64bit(&ic->i_ack_next, seq); + unsigned long flags; + + spin_lock_irqsave(&ic->i_ack_lock, flags); + ic->i_ack_next = seq; + if (ack_required) + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + spin_unlock_irqrestore(&ic->i_ack_lock, flags); +} + +static u64 rds_iw_get_ack(struct rds_iw_connection *ic) +{ + unsigned long flags; + u64 seq; + + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + + spin_lock_irqsave(&ic->i_ack_lock, flags); + seq = ic->i_ack_next; + spin_unlock_irqrestore(&ic->i_ack_lock, flags); + + return seq; +} +#else +static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, + int ack_required) +{ + atomic64_set(&ic->i_ack_next, seq); if (ack_required) { smp_mb__before_clear_bit(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); @@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic) clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); smp_mb__after_clear_bit(); - return ic->i_ack_next; + return atomic64_read(&ic->i_ack_next); } +#endif + static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) { @@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi * - i_ack_next, which is the last sequence number we received * * Potentially, send queue and receive queue handlers can run concurrently. + * It would be nice to not have to use a spinlock to synchronize things, + * but the one problem that rules this out is that 64bit updates are + * not atomic on all platforms. Things would be a lot simpler if + * we had atomic64 or maybe cmpxchg64 everywhere. * * Reconnecting complicates this picture just slightly. When we * reconnect, we may be seeing duplicate packets. The peer diff --git a/net/rds/rds.h b/net/rds/rds.h index 060400704979..71794449ca4e 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -28,6 +28,10 @@ */ #define RDS_PORT 18634 +#ifdef ATOMIC64_INIT +#define KERNEL_HAS_ATOMIC64 +#endif + #ifdef DEBUG #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else @@ -634,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *, void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); /* stats.c */ -DECLARE_PER_CPU(struct rds_statistics, rds_stats); +DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ per_cpu(which, get_cpu()).member++; \ put_cpu(); \ diff --git a/net/rds/send.c b/net/rds/send.c index 1b37364656f0..104fe033203d 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -615,7 +615,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) { struct rds_message *rm, *tmp; struct rds_connection *conn; - unsigned long flags; + unsigned long flags, flags2; LIST_HEAD(list); int wake = 0; @@ -651,9 +651,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) list_for_each_entry(rm, &list, m_sock_item) { /* We do this here rather than in the loop above, so that * we don't have to nest m_rs_lock under rs->rs_lock */ - spin_lock(&rm->m_rs_lock); + spin_lock_irqsave(&rm->m_rs_lock, flags2); rm->m_rs = NULL; - spin_unlock(&rm->m_rs_lock); + spin_unlock_irqrestore(&rm->m_rs_lock, flags2); /* * If we see this flag cleared then we're *sure* that someone diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 0f36e8d59b29..877a7f65f707 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1072,10 +1072,6 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int n, size, qbit = 0; - /* ROSE empty frame has no meaning : don't send */ - if (len == 0) - return 0; - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1273,12 +1269,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; - /* ROSE empty frame has no meaning : ignore it */ - if (copied == 0) { - skb_free_datagram(sk, skb); - return copied; - } - if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 0f1218b8d289..67e38a056240 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -343,9 +343,9 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection * and then redo the check */ conn = rxrpc_alloc_connection(gfp); - if (IS_ERR(conn)) { - _leave(" = %ld", PTR_ERR(conn)); - return PTR_ERR(conn); + if (!conn) { + _leave(" = -ENOMEM"); + return -ENOMEM; } conn->trans = trans; @@ -508,9 +508,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, /* not yet present - create a candidate for a new connection and then * redo the check */ candidate = rxrpc_alloc_connection(gfp); - if (IS_ERR(candidate)) { - _leave(" = %ld", PTR_ERR(candidate)); - return PTR_ERR(candidate); + if (!candidate) { + _leave(" = -ENOMEM"); + return -ENOMEM; } candidate->trans = trans; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 173fcc4b050d..09cdcdfe7e91 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -135,6 +135,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) unsigned long cl; unsigned long fh; int err; + int tp_created = 0; if (net != &init_net) return -EINVAL; @@ -254,7 +255,7 @@ replay: } tp->ops = tp_ops; tp->protocol = protocol; - tp->prio = nprio ? : tcf_auto_prio(*back); + tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back)); tp->q = q; tp->classify = tp_ops->classify; tp->classid = parent; @@ -266,10 +267,7 @@ replay: goto errout; } - spin_lock_bh(root_lock); - tp->next = *back; - *back = tp; - spin_unlock_bh(root_lock); + tp_created = 1; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) goto errout; @@ -296,8 +294,11 @@ replay: switch (n->nlmsg_type) { case RTM_NEWTFILTER: err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) + if (n->nlmsg_flags & NLM_F_EXCL) { + if (tp_created) + tcf_destroy(tp); goto errout; + } break; case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); @@ -314,8 +315,18 @@ replay: } err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh); - if (err == 0) + if (err == 0) { + if (tp_created) { + spin_lock_bh(root_lock); + tp->next = *back; + *back = tp; + spin_unlock_bh(root_lock); + } tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + } else { + if (tp_created) + tcf_destroy(tp); + } errout: if (cl) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 91a3db4a76f8..e5becb92b3e7 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -104,8 +104,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) { struct cls_cgroup_head *head = tp->root; - struct cgroup_cls_state *cs; - int ret = 0; + u32 classid; /* * Due to the nature of the classifier it is required to ignore all @@ -121,17 +120,18 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, return -1; rcu_read_lock(); - cs = task_cls_state(current); - if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) { - res->classid = cs->classid; - res->class = 0; - ret = tcf_exts_exec(skb, &head->exts, res); - } else - ret = -1; - + classid = task_cls_state(current)->classid; rcu_read_unlock(); - return ret; + if (!classid) + return -1; + + if (!tcf_em_tree_match(skb, &head->ematches, NULL)) + return -1; + + res->classid = classid; + res->class = 0; + return tcf_exts_exec(skb, &head->exts, res); } static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) @@ -167,6 +167,9 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, struct tcf_exts e; int err; + if (!tca[TCA_OPTIONS]) + return -EINVAL; + if (head == NULL) { if (!handle) return -EINVAL; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 72cf86e3c090..fad596bf32d7 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,8 +176,10 @@ META_COLLECTOR(var_dev) META_COLLECTOR(int_vlan_tag) { - unsigned short uninitialized_var(tag); - if (vlan_get_tag(skb, &tag) < 0) + unsigned short tag; + + tag = vlan_tx_tag_get(skb); + if (!tag && __vlan_get_tag(skb, &tag)) *err = -1; else dst->value = tag; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 92cfc9d7e3b9..69188e8358b4 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -51,7 +51,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt) u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; if (sch->ops == &bfifo_qdisc_ops) - limit *= qdisc_dev(sch)->mtu; + limit *= psched_mtu(qdisc_dev(sch)); q->limit = limit; } else { diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d876b8734848..2b88295cb7b7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -280,6 +280,14 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (unlikely(!skb)) return NULL; +#ifdef CONFIG_NET_CLS_ACT + /* + * If it's at ingress let's pretend the delay is + * from the network (tstamp will be updated). + */ + if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) + skb->tstamp.tv64 = 0; +#endif pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; return skb; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index ec697cebb63b..3b6418297231 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -303,6 +303,8 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: if (__netif_tx_trylock(slave_txq)) { + unsigned int length = qdisc_pkt_len(skb); + if (!netif_tx_queue_stopped(slave_txq) && !netif_tx_queue_frozen(slave_txq) && slave_ops->ndo_start_xmit(skb, slave) == 0) { @@ -310,8 +312,7 @@ restart: master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; - master->stats.tx_bytes += - qdisc_pkt_len(skb); + master->stats.tx_bytes += length; return 0; } __netif_tx_unlock(slave_txq); diff --git a/net/socket.c b/net/socket.c index 91d0c0254ffe..791d71a36a93 100644 --- a/net/socket.c +++ b/net/socket.c @@ -493,8 +493,7 @@ static struct socket *sock_alloc(void) inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - get_cpu_var(sockets_in_use)++; - put_cpu_var(sockets_in_use); + percpu_add(sockets_in_use, 1); return sock; } @@ -536,8 +535,7 @@ void sock_release(struct socket *sock) if (sock->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); - get_cpu_var(sockets_in_use)--; - put_cpu_var(sockets_in_use); + percpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); return; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 5592883e1e4a..443c161eb8bd 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA If unsure, say N. -config SUNRPC_REGISTER_V4 - bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - default n - help - Sun added support for registering RPC services at an IPv6 - address by creating two new versions of the rpcbind protocol - (RFC 1833). - - This option enables support in the kernel RPC server for - registering kernel RPC services via version 4 of the rpcbind - protocol. If you enable this option, you must run a portmapper - daemon that supports rpcbind protocol version 4. - - Serving NFS over IPv6 from knfsd (the kernel's NFS server) - requires that you enable this option and use a portmapper that - supports rpcbind version 4. - - If unsure, say N to get traditional behavior (register kernel - RPC services using only rpcbind version 2). Distributions - using the legacy Linux portmapper daemon must say N here. - config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL @@ -69,7 +47,7 @@ config RPCSEC_GSS_SPKM3 select CRYPTO_CBC help Choose Y here to enable Secure RPC using the SPKM3 public key - GSS-API mechansim (RFC 2025). + GSS-API mechanism (RFC 2025). Secure RPC calls with SPKM3 require an auxiliary userspace daemon which may be found in the Linux nfs-utils package diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 836f15c0c4a3..5abab094441f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - if (status >= 0) { + if (status >= 0 || status == -EAGAIN) { clnt->cl_stats->netreconn++; task->tk_action = call_transmit; return; } - /* Something failed: remote service port may have changed */ - rpc_force_rebind(clnt); - switch (status) { - case -ENOTCONN: - case -EAGAIN: - task->tk_action = call_bind; - if (!RPC_IS_SOFT(task)) - return; /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: task->tk_action = call_timeout; - return; + break; + default: + rpc_exit(task, -EIO); } - rpc_exit(task, -EIO); } /* @@ -1105,14 +1098,26 @@ static void call_transmit_status(struct rpc_task *task) { task->tk_action = call_status; - /* - * Special case: if we've been waiting on the socket's write_space() - * callback, then don't call xprt_end_transmit(). - */ - if (task->tk_status == -EAGAIN) - return; - xprt_end_transmit(task); - rpc_task_force_reencode(task); + switch (task->tk_status) { + case -EAGAIN: + break; + default: + xprt_end_transmit(task); + /* + * Special cases: if we've been waiting on the + * socket's write_space() callback, or if the + * socket just returned a connection error, + * then hold onto the transport lock. + */ + case -ECONNREFUSED: + case -ECONNRESET: + case -ENOTCONN: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EPIPE: + rpc_task_force_reencode(task); + } } /* @@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task) xprt_conditional_disconnect(task->tk_xprt, req->rq_connect_cookie); break; + case -ECONNRESET: case -ECONNREFUSED: - case -ENOTCONN: rpc_force_rebind(clnt); + rpc_delay(task, 3*HZ); + case -EPIPE: + case -ENOTCONN: task->tk_action = call_bind; break; case -EAGAIN: diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 03ae007641e4..beee6da33035 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -63,9 +63,16 @@ enum { * r_owner * * The "owner" is allowed to unset a service in the rpcbind database. - * We always use the following (arbitrary) fixed string. + * + * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a + * UID which it maps to a local user name via a password lookup. + * In all other cases it is ignored. + * + * For SET/UNSET requests, user space provides a value, even for + * network requests, and GETADDR uses an empty string. We follow + * those precedents here. */ -#define RPCB_OWNER_STRING "rpcb" +#define RPCB_OWNER_STRING "0" #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); @@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static const struct sockaddr_in6 rpcb_in6addr_loopback = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - .sin6_port = htons(RPCBIND_PORT), -}; - static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, size_t addrlen, u32 version) { @@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, - u32 version, struct rpc_message *msg) +static int rpcb_register_call(const u32 version, struct rpc_message *msg) { + struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; + size_t addrlen = sizeof(rpcb_inaddr_loopback); struct rpc_clnt *rpcb_clnt; int result, error = 0; @@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, error = PTR_ERR(rpcb_clnt); if (error < 0) { - printk(KERN_WARNING "RPC: failed to contact local rpcbind " + dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); return error; } @@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, - sizeof(rpcb_inaddr_loopback), - RPCBVERS_2, &msg); + return rpcb_register_call(RPCBVERS_2, &msg); } /* * Fill in AF_INET family-specific arguments to register */ -static int rpcb_register_netid4(struct sockaddr_in *address_to_register, - struct rpc_message *msg) +static int rpcb_register_inet4(const struct sockaddr *sap, + struct rpc_message *msg) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; - unsigned short port = ntohs(address_to_register->sin_port); + unsigned short port = ntohs(sin->sin_port); char buf[32]; /* Construct AF_INET universal address */ snprintf(buf, sizeof(buf), "%pI4.%u.%u", - &address_to_register->sin_addr.s_addr, - port >> 8, port & 0xff); + &sin->sin_addr.s_addr, port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, - sizeof(rpcb_inaddr_loopback), - RPCBVERS_4, msg); + return rpcb_register_call(RPCBVERS_4, msg); } /* * Fill in AF_INET6 family-specific arguments to register */ -static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, - struct rpc_message *msg) +static int rpcb_register_inet6(const struct sockaddr *sap, + struct rpc_message *msg) { + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; - unsigned short port = ntohs(address_to_register->sin6_port); + unsigned short port = ntohs(sin6->sin6_port); char buf[64]; /* Construct AF_INET6 universal address */ - if (ipv6_addr_any(&address_to_register->sin6_addr)) + if (ipv6_addr_any(&sin6->sin6_addr)) snprintf(buf, sizeof(buf), "::.%u.%u", port >> 8, port & 0xff); else snprintf(buf, sizeof(buf), "%pI6.%u.%u", - &address_to_register->sin6_addr, - port >> 8, port & 0xff); + &sin6->sin6_addr, port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, - sizeof(rpcb_in6addr_loopback), - RPCBVERS_4, msg); + return rpcb_register_call(RPCBVERS_4, msg); +} + +static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + + dprintk("RPC: unregistering [%u, %u, '%s'] with " + "local rpcbind\n", + map->r_prog, map->r_vers, map->r_netid); + + map->r_addr = ""; + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + + return rpcb_register_call(RPCBVERS_4, msg); } /** @@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, * invoke this function once for each [program, version, address, * netid] tuple they wish to advertise. * - * Callers may also unregister RPC services that are no longer - * available by setting the port number in the passed-in address - * to zero. Callers pass a netid of "" to unregister all - * transport netids associated with [program, version, address]. + * Callers may also unregister RPC services that are registered at a + * specific address by setting the port number in @address to zero. + * They may unregister all registered protocol families at once for + * a service by passing a NULL @address argument. If @netid is "" + * then all netids for [program, version, address] are unregistered. * * This function uses rpcbind protocol version 4 to contact the * local rpcbind daemon. The local rpcbind daemon must support @@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version, .rpc_argp = &map, }; + if (address == NULL) + return rpcb_unregister_all_protofamilies(&msg); + switch (address->sa_family) { case AF_INET: - return rpcb_register_netid4((struct sockaddr_in *)address, - &msg); + return rpcb_register_inet4(address, &msg); case AF_INET6: - return rpcb_register_netid6((struct sockaddr_in6 *)address, - &msg); + return rpcb_register_inet6(address, &msg); } return -EAFNOSUPPORT; @@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task) map->r_xprt = xprt_get(xprt); map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); - map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + map->r_owner = ""; map->r_status = -EIO; child = rpcb_call_async(rpcb_clnt, map, proc); @@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, *portp = 0; addr_len = ntohl(*p++); + if (addr_len == 0) { + dprintk("RPC: rpcb_decode_getaddr: " + "service is not registered\n"); + return 0; + } + /* - * Simple sanity check. The smallest possible universal - * address is an IPv4 address string containing 11 bytes. + * Simple sanity check. */ - if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN) + if (addr_len > RPCBIND_MAXUADDRLEN) goto out_err; /* diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index bb507e2bb94d..8847add6ca16 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -317,8 +317,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) } case SVC_POOL_PERNODE: { - node_to_cpumask_ptr(nodecpumask, node); - set_cpus_allowed_ptr(task, nodecpumask); + set_cpus_allowed_ptr(task, cpumask_of_node(node)); break; } } @@ -359,7 +358,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - sa_family_t family, void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv)) { struct svc_serv *serv; unsigned int vers; @@ -368,7 +367,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; - serv->sv_family = family; serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; @@ -427,21 +425,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - sa_family_t family, void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv)) { - return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, shutdown); } EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - sa_family_t family, void (*shutdown)(struct svc_serv *serv), + void (*shutdown)(struct svc_serv *serv), svc_thread_fn func, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, family, shutdown); + serv = __svc_create(prog, bufsize, npools, shutdown); if (serv != NULL) { serv->sv_function = func; @@ -719,8 +717,6 @@ svc_exit_thread(struct svc_rqst *rqstp) } EXPORT_SYMBOL_GPL(svc_exit_thread); -#ifdef CONFIG_SUNRPC_REGISTER_V4 - /* * Register an "inet" protocol family netid with the local * rpcbind daemon via an rpcbind v4 SET request. @@ -735,12 +731,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, const unsigned short protocol, const unsigned short port) { - struct sockaddr_in sin = { + const struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; - char *netid; + const char *netid; + int error; switch (protocol) { case IPPROTO_UDP: @@ -750,13 +747,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, netid = RPCBIND_NETID_TCP; break; default: - return -EPROTONOSUPPORT; + return -ENOPROTOOPT; } - return rpcb_v4_register(program, version, - (struct sockaddr *)&sin, netid); + error = rpcb_v4_register(program, version, + (const struct sockaddr *)&sin, netid); + + /* + * User space didn't support rpcbind v4, so retry this + * registration request with the legacy rpcbind v2 protocol. + */ + if (error == -EPROTONOSUPPORT) + error = rpcb_register(program, version, protocol, port); + + return error; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) /* * Register an "inet6" protocol family netid with the local * rpcbind daemon via an rpcbind v4 SET request. @@ -771,12 +778,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, const unsigned short protocol, const unsigned short port) { - struct sockaddr_in6 sin6 = { + const struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, .sin6_port = htons(port), }; - char *netid; + const char *netid; + int error; switch (protocol) { case IPPROTO_UDP: @@ -786,12 +794,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, netid = RPCBIND_NETID_TCP6; break; default: - return -EPROTONOSUPPORT; + return -ENOPROTOOPT; } - return rpcb_v4_register(program, version, - (struct sockaddr *)&sin6, netid); + error = rpcb_v4_register(program, version, + (const struct sockaddr *)&sin6, netid); + + /* + * User space didn't support rpcbind version 4, so we won't + * use a PF_INET6 listener. + */ + if (error == -EPROTONOSUPPORT) + error = -EAFNOSUPPORT; + + return error; } +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ /* * Register a kernel RPC service via rpcbind version 4. @@ -799,69 +817,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_register(const u32 program, const u32 version, - const sa_family_t family, +static int __svc_register(const char *progname, + const u32 program, const u32 version, + const int family, const unsigned short protocol, const unsigned short port) { - int error; + int error = -EAFNOSUPPORT; switch (family) { - case AF_INET: - return __svc_rpcb_register4(program, version, + case PF_INET: + error = __svc_rpcb_register4(program, version, protocol, port); - case AF_INET6: + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case PF_INET6: error = __svc_rpcb_register6(program, version, protocol, port); - if (error < 0) - return error; - - /* - * Work around bug in some versions of Linux rpcbind - * which don't allow registration of both inet and - * inet6 netids. - * - * Error return ignored for now. - */ - __svc_rpcb_register4(program, version, - protocol, port); - return 0; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ } - return -EAFNOSUPPORT; -} - -#else /* CONFIG_SUNRPC_REGISTER_V4 */ - -/* - * Register a kernel RPC service via rpcbind version 2. - * - * Returns zero on success; a negative errno value is returned - * if any error occurs. - */ -static int __svc_register(const u32 program, const u32 version, - sa_family_t family, - const unsigned short protocol, - const unsigned short port) -{ - if (family != AF_INET) - return -EAFNOSUPPORT; - - return rpcb_register(program, version, protocol, port); + if (error < 0) + printk(KERN_WARNING "svc: failed to register %sv%u RPC " + "service (errno %d).\n", progname, version, -error); + return error; } -#endif /* CONFIG_SUNRPC_REGISTER_V4 */ - /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register + * @family: protocol family of service's listener socket * @proto: transport protocol number to advertise * @port: port to advertise * - * Service is registered for any address in serv's address family + * Service is registered for any address in the passed-in protocol family */ -int svc_register(const struct svc_serv *serv, const unsigned short proto, - const unsigned short port) +int svc_register(const struct svc_serv *serv, const int family, + const unsigned short proto, const unsigned short port) { struct svc_program *progp; unsigned int i; @@ -879,15 +871,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto, i, proto == IPPROTO_UDP? "udp" : "tcp", port, - serv->sv_family, + family, progp->pg_vers[i]->vs_hidden? " (but not telling portmap)" : ""); if (progp->pg_vers[i]->vs_hidden) continue; - error = __svc_register(progp->pg_prog, i, - serv->sv_family, proto, port); + error = __svc_register(progp->pg_name, progp->pg_prog, + i, family, proto, port); if (error < 0) break; } @@ -896,38 +888,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto, return error; } -#ifdef CONFIG_SUNRPC_REGISTER_V4 - +/* + * If user space is running rpcbind, it should take the v4 UNSET + * and clear everything for this [program, version]. If user space + * is running portmap, it will reject the v4 UNSET, but won't have + * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient + * in this case to clear all existing entries for [program, version]. + */ static void __svc_unregister(const u32 program, const u32 version, const char *progname) { - struct sockaddr_in6 sin6 = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_ANY_INIT, - .sin6_port = 0, - }; int error; - error = rpcb_v4_register(program, version, - (struct sockaddr *)&sin6, ""); - dprintk("svc: %s(%sv%u), error %d\n", - __func__, progname, version, error); -} - -#else /* CONFIG_SUNRPC_REGISTER_V4 */ + error = rpcb_v4_register(program, version, NULL, ""); -static void __svc_unregister(const u32 program, const u32 version, - const char *progname) -{ - int error; + /* + * User space didn't support rpcbind v4, so retry this + * request with the legacy rpcbind v2 protocol. + */ + if (error == -EPROTONOSUPPORT) + error = rpcb_register(program, version, 0, 0); - error = rpcb_register(program, version, 0, 0); dprintk("svc: %s(%sv%u), error %d\n", __func__, progname, version, error); } -#endif /* CONFIG_SUNRPC_REGISTER_V4 */ - /* * All netids, bind addresses and ports registered for [program, version] * are removed from the local rpcbind database (if the service is not @@ -1023,6 +1008,8 @@ svc_process(struct svc_rqst *rqstp) rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ rqstp->rq_splice_ok = 1; + /* Will be turned off only when NFSv4 Sessions are used */ + rqstp->rq_usedeferral = 1; /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); @@ -1093,7 +1080,6 @@ svc_process(struct svc_rqst *rqstp) procp = versp->vs_proc + proc; if (proc >= versp->vs_nproc || !procp->pc_func) goto err_bad_proc; - rqstp->rq_server = serv; rqstp->rq_procinfo = procp; /* Syntactic check complete */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e588df5d6b34..c200d92e57e4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -14,6 +14,8 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +#define SVC_MAX_WAKING 5 + static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); @@ -161,7 +163,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init); static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct svc_serv *serv, - unsigned short port, int flags) + const int family, + const unsigned short port, + int flags) { struct sockaddr_in sin = { .sin_family = AF_INET, @@ -176,12 +180,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct sockaddr *sap; size_t len; - switch (serv->sv_family) { - case AF_INET: + switch (family) { + case PF_INET: sap = (struct sockaddr *)&sin; len = sizeof(sin); break; - case AF_INET6: + case PF_INET6: sap = (struct sockaddr *)&sin6; len = sizeof(sin6); break; @@ -192,7 +196,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return xcl->xcl_ops->xpo_create(serv, sap, len, flags); } -int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, +int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, + const int family, const unsigned short port, int flags) { struct svc_xprt_class *xcl; @@ -209,7 +214,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = __svc_xpo_create(xcl, serv, port, flags); + newxprt = __svc_xpo_create(xcl, serv, family, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); @@ -298,6 +303,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; + int thread_avail; if (!(xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) @@ -309,18 +315,14 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) spin_lock_bh(&pool->sp_lock); - if (!list_empty(&pool->sp_threads) && - !list_empty(&pool->sp_sockets)) - printk(KERN_ERR - "svc_xprt_enqueue: " - "threads and transports both waiting??\n"); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { /* Don't enqueue dead transports */ dprintk("svc: transport %p is dead, not enqueued\n", xprt); goto out_unlock; } + pool->sp_stats.packets++; + /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY * atomically because it also guards against trying to enqueue @@ -353,7 +355,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) } process: - if (!list_empty(&pool->sp_threads)) { + /* Work out whether threads are available */ + thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */ + if (pool->sp_nwaking >= SVC_MAX_WAKING) { + /* too many threads are runnable and trying to wake up */ + thread_avail = 0; + pool->sp_stats.overloads_avoided++; + } + + if (thread_avail) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); @@ -368,11 +378,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) svc_xprt_get(xprt); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + rqstp->rq_waking = 1; + pool->sp_nwaking++; + pool->sp_stats.threads_woken++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); + pool->sp_stats.sockets_queued++; BUG_ON(xprt->xpt_pool != pool); } @@ -585,6 +599,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) int pages; struct xdr_buf *arg; DECLARE_WAITQUEUE(wait, current); + long time_left; dprintk("svc: server %p waiting for data (to = %ld)\n", rqstp, timeout); @@ -633,6 +648,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) return -EINTR; spin_lock_bh(&pool->sp_lock); + if (rqstp->rq_waking) { + rqstp->rq_waking = 0; + pool->sp_nwaking--; + BUG_ON(pool->sp_nwaking < 0); + } xprt = svc_xprt_dequeue(pool); if (xprt) { rqstp->rq_xprt = xprt; @@ -665,12 +685,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); - schedule_timeout(timeout); + time_left = schedule_timeout(timeout); try_to_freeze(); spin_lock_bh(&pool->sp_lock); remove_wait_queue(&rqstp->rq_wait, &wait); + if (!time_left) + pool->sp_stats.threads_timedout++; xprt = rqstp->rq_xprt; if (!xprt) { @@ -955,7 +977,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); struct svc_deferred_req *dr; - if (rqstp->rq_arg.page_len) + if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral) return NULL; /* if more than a page, give up FIXME */ if (rqstp->rq_deferred) { dr = rqstp->rq_deferred; @@ -1033,7 +1055,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) return dr; } -/* +/** + * svc_find_xprt - find an RPC transport instance + * @serv: pointer to svc_serv to search + * @xcl_name: C string containing transport's class name + * @af: Address family of transport's local address + * @port: transport's IP port number + * * Return the transport instance pointer for the endpoint accepting * connections/peer traffic from the specified transport class, * address family and port. @@ -1042,14 +1070,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * wild-card, and will result in matching the first transport in the * service's list that has a matching class name. */ -struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, - int af, int port) +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + const sa_family_t af, const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; /* Sanity check the args */ - if (!serv || !xcl_name) + if (serv == NULL || xcl_name == NULL) return found; spin_lock_bh(&serv->sv_lock); @@ -1058,7 +1086,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) continue; - if (port && port != svc_xprt_local_port(xprt)) + if (port != 0 && port != svc_xprt_local_port(xprt)) continue; found = xprt; svc_xprt_get(xprt); @@ -1103,3 +1131,93 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) return totlen; } EXPORT_SYMBOL_GPL(svc_xprt_names); + + +/*----------------------------------------------------------------------------*/ + +static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) +{ + unsigned int pidx = (unsigned int)*pos; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); + + lock_kernel(); + /* bump up the pseudo refcount while traversing */ + svc_get(serv); + unlock_kernel(); + + if (!pidx) + return SEQ_START_TOKEN; + return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); +} + +static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct svc_pool *pool = p; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_next, *pos=%llu\n", *pos); + + if (p == SEQ_START_TOKEN) { + pool = &serv->sv_pools[0]; + } else { + unsigned int pidx = (pool - &serv->sv_pools[0]); + if (pidx < serv->sv_nrpools-1) + pool = &serv->sv_pools[pidx+1]; + else + pool = NULL; + } + ++*pos; + return pool; +} + +static void svc_pool_stats_stop(struct seq_file *m, void *p) +{ + struct svc_serv *serv = m->private; + + lock_kernel(); + /* this function really, really should have been called svc_put() */ + svc_destroy(serv); + unlock_kernel(); +} + +static int svc_pool_stats_show(struct seq_file *m, void *p) +{ + struct svc_pool *pool = p; + + if (p == SEQ_START_TOKEN) { + seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n"); + return 0; + } + + seq_printf(m, "%u %lu %lu %lu %lu %lu\n", + pool->sp_id, + pool->sp_stats.packets, + pool->sp_stats.sockets_queued, + pool->sp_stats.threads_woken, + pool->sp_stats.overloads_avoided, + pool->sp_stats.threads_timedout); + + return 0; +} + +static const struct seq_operations svc_pool_stats_seq_ops = { + .start = svc_pool_stats_start, + .next = svc_pool_stats_next, + .stop = svc_pool_stats_stop, + .show = svc_pool_stats_show, +}; + +int svc_pool_stats_open(struct svc_serv *serv, struct file *file) +{ + int err; + + err = seq_open(file, &svc_pool_stats_seq_ops); + if (!err) + ((struct seq_file *) file->private_data)->private = serv; + return err; +} +EXPORT_SYMBOL(svc_pool_stats_open); + +/*----------------------------------------------------------------------------*/ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5763e6460fea..9d504234af4a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct svc_sock *svsk; struct sock *inet; int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); - int val; dprintk("svc: svc_setup_socket %p\n", sock); if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { @@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, inet->sk_protocol, + *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, ntohs(inet_sk(inet)->sport)); if (*errp < 0) { @@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - /* - * We start one listener per sv_serv. We want AF_INET - * requests to be automatically shunted to our AF_INET6 - * listener using a mapped IPv4 address. Make sure - * no-one starts an equivalent IPv4 listener, which - * would steal our incoming connections. - */ - val = 0; - if (serv->sv_family == AF_INET6) - kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, - (char *)&val, sizeof(val)); - dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, struct sockaddr_storage addr; struct sockaddr *newsin = (struct sockaddr *)&addr; int newlen; + int family; + int val; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("svc: svc_create_socket(%s, %d, %s)\n", @@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, "sockets supported\n"); return ERR_PTR(-EINVAL); } + type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; + switch (sin->sa_family) { + case AF_INET6: + family = PF_INET6; + break; + case AF_INET: + family = PF_INET; + break; + default: + return ERR_PTR(-EINVAL); + } - error = sock_create_kern(sin->sa_family, type, protocol, &sock); + error = sock_create_kern(family, type, protocol, &sock); if (error < 0) return ERR_PTR(error); svc_reclassify_socket(sock); + /* + * If this is an PF_INET6 listener, we want to avoid + * getting requests from IPv4 remotes. Those should + * be shunted to a PF_INET listener via rpcbind. + */ + val = 1; + if (family == PF_INET6) + kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, + (char *)&val, sizeof(val)); + if (type == SOCK_STREAM) sock->sk->sk_reuse = 1; /* allow address reuse */ error = kernel_bind(sock, sin, len); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 62098d101a1f..06ca058572f2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -152,6 +152,37 @@ out: EXPORT_SYMBOL_GPL(xprt_unregister_transport); /** + * xprt_load_transport - load a transport implementation + * @transport_name: transport to load + * + * Returns: + * 0: transport successfully loaded + * -ENOENT: transport module not available + */ +int xprt_load_transport(const char *transport_name) +{ + struct xprt_class *t; + char module_name[sizeof t->name + 5]; + int result; + + result = 0; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (strcmp(t->name, transport_name) == 0) { + spin_unlock(&xprt_list_lock); + goto out; + } + } + spin_unlock(&xprt_list_lock); + strcpy(module_name, "xprt"); + strncat(module_name, transport_name, sizeof t->name); + result = request_module(module_name); +out: + return result; +} +EXPORT_SYMBOL_GPL(xprt_load_transport); + +/** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport * @@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt) dprintk("RPC: disconnected transport %p\n", xprt); spin_lock_bh(&xprt->transport_lock); xprt_clear_connected(xprt); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } EXPORT_SYMBOL_GPL(xprt_disconnect_done); @@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } @@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); out: spin_unlock_bh(&xprt->transport_lock); } @@ -641,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - if (xprt_connecting(xprt)) - xprt_release_write(xprt, NULL); - else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); @@ -695,9 +724,8 @@ static void xprt_connect_status(struct rpc_task *task) } switch (task->tk_status) { - case -ENOTCONN: - dprintk("RPC: %5u xprt_connect_status: connection broken\n", - task->tk_pid); + case -EAGAIN: + dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid); break; case -ETIMEDOUT: dprintk("RPC: %5u xprt_connect_status: connect attempt timed " @@ -818,15 +846,8 @@ int xprt_prepare_transmit(struct rpc_task *task) err = req->rq_received; goto out_unlock; } - if (!xprt->ops->reserve_xprt(task)) { + if (!xprt->ops->reserve_xprt(task)) err = -EAGAIN; - goto out_unlock; - } - - if (!xprt_connected(xprt)) { - err = -ENOTCONN; - goto out_unlock; - } out_unlock: spin_unlock_bh(&xprt->transport_lock); return err; @@ -870,32 +891,26 @@ void xprt_transmit(struct rpc_task *task) req->rq_connect_cookie = xprt->connect_cookie; req->rq_xtime = jiffies; status = xprt->ops->send_request(task); - if (status == 0) { - dprintk("RPC: %5u xmit complete\n", task->tk_pid); - spin_lock_bh(&xprt->transport_lock); + if (status != 0) { + task->tk_status = status; + return; + } - xprt->ops->set_retrans_timeout(task); + dprintk("RPC: %5u xmit complete\n", task->tk_pid); + spin_lock_bh(&xprt->transport_lock); - xprt->stat.sends++; - xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; - xprt->stat.bklog_u += xprt->backlog.qlen; + xprt->ops->set_retrans_timeout(task); - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, xprt_timer); - spin_unlock_bh(&xprt->transport_lock); - return; - } + xprt->stat.sends++; + xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; + xprt->stat.bklog_u += xprt->backlog.qlen; - /* Note: at this point, task->tk_sleeping has not yet been set, - * hence there is no danger of the waking up task being put on - * schedq, and being picked up by a parallel run of rpciod(). - */ - task->tk_status = status; - if (status == -ECONNREFUSED) - rpc_sleep_on(&xprt->sending, task, NULL); + /* Don't race with disconnect */ + if (!xprt_connected(xprt)) + task->tk_status = -ENOTCONN; + else if (!req->rq_received) + rpc_sleep_on(&xprt->pending, task, xprt_timer); + spin_unlock_bh(&xprt->transport_lock); } static inline void do_xprt_reserve(struct rpc_task *task) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 14106d26bb95..e5e28d1946a4 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) __func__, pad, destp, rqst->rq_slen, curlen); copy_len = rqst->rq_snd_buf.page_len; + + if (rqst->rq_snd_buf.tail[0].iov_len) { + curlen = rqst->rq_snd_buf.tail[0].iov_len; + if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) { + memmove(destp + copy_len, + rqst->rq_snd_buf.tail[0].iov_base, curlen); + r_xprt->rx_stats.pullup_copy_count += curlen; + } + dprintk("RPC: %s: tail destp 0x%p len %d\n", + __func__, destp + copy_len, curlen); + rqst->rq_svec[0].iov_len += curlen; + } + r_xprt->rx_stats.pullup_copy_count += copy_len; npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; for (i = 0; copy_len && i < npages; i++) { @@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) destp += curlen; copy_len -= curlen; } - if (rqst->rq_snd_buf.tail[0].iov_len) { - curlen = rqst->rq_snd_buf.tail[0].iov_len; - if (destp != rqst->rq_snd_buf.tail[0].iov_base) { - memcpy(destp, - rqst->rq_snd_buf.tail[0].iov_base, curlen); - r_xprt->rx_stats.pullup_copy_count += curlen; - } - dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n", - __func__, destp, copy_len, curlen); - rqst->rq_svec[0].iov_len += curlen; - } /* header now contains entire send message */ return pad; } @@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) curlen = rqst->rq_rcv_buf.tail[0].iov_len; if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) - memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); + memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", __func__, srcp, copy_len, curlen); rqst->rq_rcv_buf.tail[0].iov_len = curlen; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 629a28764da9..42a6f9f20285 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -265,7 +265,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, frmr->page_list->page_list[page_no] = ib_dma_map_single(xprt->sc_cm_id->device, page_address(rqstp->rq_arg.pages[page_no]), - PAGE_SIZE, DMA_TO_DEVICE); + PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) goto fatal_err; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index a3334e3b73cc..f11be72a1a80 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -128,7 +128,8 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, page_bytes -= sge_bytes; frmr->page_list->page_list[page_no] = - ib_dma_map_page(xprt->sc_cm_id->device, page, 0, + ib_dma_map_single(xprt->sc_cm_id->device, + page_address(page), PAGE_SIZE, DMA_TO_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, frmr->page_list->page_list[page_no])) @@ -183,6 +184,7 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt, fatal_err: printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); + vec->frmr = NULL; svc_rdma_put_frmr(xprt, frmr); return -EIO; } @@ -191,7 +193,6 @@ static int map_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) { - int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; int sge_no; u32 sge_bytes; u32 page_bytes; @@ -235,7 +236,11 @@ static int map_xdr(struct svcxprt_rdma *xprt, sge_no++; } - BUG_ON(sge_no > sge_max); + dprintk("svcrdma: map_xdr: sge_no %d page_no %d " + "page_base %u page_len %u head_len %zu tail_len %zu\n", + sge_no, page_no, xdr->page_base, xdr->page_len, + xdr->head[0].iov_len, xdr->tail[0].iov_len); + vec->count = sge_no; return 0; } @@ -513,6 +518,7 @@ static int send_reply(struct svcxprt_rdma *rdma, "svcrdma: could not post a receive buffer, err=%d." "Closing transport %p.\n", ret, rdma); set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 0); return -ENOTCONN; } @@ -527,18 +533,17 @@ static int send_reply(struct svcxprt_rdma *rdma, clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ + ctxt->sge[0].lkey = rdma->sc_dma_lkey; + ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); ctxt->sge[0].addr = - ib_dma_map_page(rdma->sc_cm_id->device, - page, 0, PAGE_SIZE, DMA_TO_DEVICE); + ib_dma_map_single(rdma->sc_cm_id->device, page_address(page), + ctxt->sge[0].length, DMA_TO_DEVICE); if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) goto err; atomic_inc(&rdma->sc_dma_used); ctxt->direction = DMA_TO_DEVICE; - ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); - ctxt->sge[0].lkey = rdma->sc_dma_lkey; - /* Determine how many of our SGE are to be transmitted */ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); @@ -579,7 +584,6 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[page_no+1].length = 0; } BUG_ON(sge_no > rdma->sc_max_sge); - BUG_ON(sge_no > ctxt->count); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; @@ -604,6 +608,7 @@ static int send_reply(struct svcxprt_rdma *rdma, return 0; err: + svc_rdma_unmap_dma(ctxt); svc_rdma_put_frmr(rdma, vec->frmr); svc_rdma_put_context(ctxt, 1); return -EIO; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 3d810e7df3fb..5151f9f6c573 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -500,8 +500,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; - pa = ib_dma_map_page(xprt->sc_cm_id->device, - page, 0, PAGE_SIZE, + pa = ib_dma_map_single(xprt->sc_cm_id->device, + page_address(page), PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) goto err_put_ctxt; @@ -520,8 +520,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); if (ret) { - svc_xprt_put(&xprt->sc_xprt); + svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 1); + svc_xprt_put(&xprt->sc_xprt); } return ret; @@ -1314,8 +1315,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); /* Prepare SGE for local address */ - sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, - p, 0, PAGE_SIZE, DMA_FROM_DEVICE); + sge.addr = ib_dma_map_single(xprt->sc_cm_id->device, + page_address(p), PAGE_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { put_page(p); return; @@ -1342,7 +1343,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, if (ret) { dprintk("svcrdma: Error %d posting send for protocol error\n", ret); - ib_dma_unmap_page(xprt->sc_cm_id->device, + ib_dma_unmap_single(xprt->sc_cm_id->device, sge.addr, PAGE_SIZE, DMA_FROM_DEVICE); svc_rdma_put_context(ctxt, 1); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3b21e0cc5e69..465aafc2007f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1495,7 +1495,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; frmr_wr.wr.fast_reg.access_flags = (writing ? - IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ); frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 568330eebbfe..e18596146013 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +#define XS_TCP_LINGER_TO (15U * HZ) +static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; + /* * We can register our own files under /proc/sys/sunrpc by * calling register_sysctl_table() again. The files in that @@ -117,6 +120,14 @@ static ctl_table xs_tunables_table[] = { .extra2 = &xprt_max_resvport_limit }, { + .procname = "tcp_fin_timeout", + .data = &xs_tcp_fin_timeout, + .maxlen = sizeof(xs_tcp_fin_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = sysctl_jiffies + }, + { .ctl_name = 0, }, }; @@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task) * @task: task to put to sleep * */ -static void xs_nospace(struct rpc_task *task) +static int xs_nospace(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = 0; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, @@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + ret = -EAGAIN; /* * Notify TCP that we're limited by the application * window size @@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task) } } else { clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - task->tk_status = -ENOTCONN; + ret = -ENOTCONN; } spin_unlock_bh(&xprt->transport_lock); + return ret; } /** @@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task) /* Still some bytes left; set up for a retry later. */ status = -EAGAIN; } + if (!transport->sock) + goto out; switch (status) { case -ENOTSOCK: @@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - xs_nospace(task); + status = xs_nospace(task); break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); case -ENETUNREACH: case -EPIPE: case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - break; - default: - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); } - +out: return status; } @@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task) status = -EAGAIN; break; } + if (!transport->sock) + goto out; switch (status) { case -ENOTSOCK: @@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - xs_nospace(task); + status = xs_nospace(task); break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); case -ECONNRESET: + case -EPIPE: xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: - case -EPIPE: - status = -ENOTCONN; - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - break; - default: - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - xs_tcp_shutdown(xprt); } - +out: return status; } @@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s sk->sk_error_report = transport->old_error_report; } -/** - * xs_close - close a socket - * @xprt: transport - * - * This is used when all requests are complete; ie, no DRC state remains - * on the server we want to save. - */ -static void xs_close(struct rpc_xprt *xprt) +static void xs_reset_transport(struct sock_xprt *transport) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct socket *sock = transport->sock; struct sock *sk = transport->inet; - if (!sk) - goto clear_close_wait; - - dprintk("RPC: xs_close xprt %p\n", xprt); + if (sk == NULL) + return; write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; @@ -797,14 +799,42 @@ static void xs_close(struct rpc_xprt *xprt) sk->sk_no_check = 0; sock_release(sock); -clear_close_wait: +} + +/** + * xs_close - close a socket + * @xprt: transport + * + * This is used when all requests are complete; ie, no DRC state remains + * on the server we want to save. + * + * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with + * xs_reset_transport() zeroing the socket from underneath a writer. + */ +static void xs_close(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + dprintk("RPC: xs_close xprt %p\n", xprt); + + xs_reset_transport(transport); + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); xprt_disconnect_done(xprt); } +static void xs_tcp_close(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) + xs_close(xprt); + else + xs_tcp_shutdown(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -1126,6 +1156,47 @@ out: read_unlock(&sk->sk_callback_lock); } +/* + * Do the equivalent of linger/linger2 handling for dealing with + * broken servers that don't close the socket in a timely + * fashion + */ +static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, + unsigned long timeout) +{ + struct sock_xprt *transport; + + if (xprt_test_and_set_connecting(xprt)) + return; + set_bit(XPRT_CONNECTION_ABORT, &xprt->state); + transport = container_of(xprt, struct sock_xprt, xprt); + queue_delayed_work(rpciod_workqueue, &transport->connect_worker, + timeout); +} + +static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport; + + transport = container_of(xprt, struct sock_xprt, xprt); + + if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || + !cancel_delayed_work(&transport->connect_worker)) + return; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + xprt_clear_connecting(xprt); +} + +static void xs_sock_mark_closed(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); +} + /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -1158,7 +1229,7 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; - xprt_wake_pending_tasks(xprt, 0); + xprt_wake_pending_tasks(xprt, -EAGAIN); } spin_unlock_bh(&xprt->transport_lock); break; @@ -1171,10 +1242,10 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_clear_bit(); + xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ - set_bit(XPRT_CLOSING, &xprt->state); xprt_force_disconnect(xprt); case TCP_SYN_SENT: xprt->connect_cookie++; @@ -1187,40 +1258,35 @@ static void xs_tcp_state_change(struct sock *sk) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; break; case TCP_LAST_ACK: + set_bit(XPRT_CLOSING, &xprt->state); + xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); break; case TCP_CLOSE: - smp_mb__before_clear_bit(); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_clear_bit(); - /* Mark transport as closed and wake up all pending tasks */ - xprt_disconnect_done(xprt); + xs_tcp_cancel_linger_timeout(xprt); + xs_sock_mark_closed(xprt); } out: read_unlock(&sk->sk_callback_lock); } /** - * xs_tcp_error_report - callback mainly for catching RST events + * xs_error_report - callback mainly for catching socket errors * @sk: socket */ -static void xs_tcp_error_report(struct sock *sk) +static void xs_error_report(struct sock *sk) { struct rpc_xprt *xprt; read_lock(&sk->sk_callback_lock); - if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED) - goto out; if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: %s client %p...\n" "RPC: error %d\n", __func__, xprt, sk->sk_err); - - xprt_force_disconnect(xprt); + xprt_wake_pending_tasks(xprt, -EAGAIN); out: read_unlock(&sk->sk_callback_lock); } @@ -1494,6 +1560,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; sk->sk_no_check = UDP_CSUM_NORCV; sk->sk_allocation = GFP_ATOMIC; @@ -1526,9 +1593,10 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; /* Start by resetting any existing state */ - xs_close(xprt); + xs_reset_transport(transport); - if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } @@ -1545,8 +1613,8 @@ static void xs_udp_connect_worker4(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** @@ -1567,9 +1635,10 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; /* Start by resetting any existing state */ - xs_close(xprt); + xs_reset_transport(transport); - if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } @@ -1586,18 +1655,17 @@ static void xs_udp_connect_worker6(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /* * We need to preserve the port number so the reply cache on the server can * find our cached RPC replies when we get around to reconnecting. */ -static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) +static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) { int result; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct sockaddr any; dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); @@ -1609,11 +1677,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); - if (result) + if (!result) + xs_sock_mark_closed(xprt); + else dprintk("RPC: AF_UNSPEC connect return code %d\n", result); } +static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) +{ + unsigned int state = transport->inet->sk_state; + + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) + return; + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) + return; + xs_abort_connection(xprt, transport); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1629,7 +1710,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; - sk->sk_error_report = xs_tcp_error_report; + sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_ATOMIC; /* socket options */ @@ -1657,37 +1738,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } /** - * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect + * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker4(struct work_struct *work) +static void xs_tcp_setup_socket(struct rpc_xprt *xprt, + struct sock_xprt *transport, + struct socket *(*create_sock)(struct rpc_xprt *, + struct sock_xprt *)) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; - int err, status = -EIO; + int status = -EIO; if (xprt->shutdown) goto out; if (!sock) { - /* start from scratch */ - if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + sock = create_sock(xprt, transport); + if (IS_ERR(sock)) { + status = PTR_ERR(sock); goto out; } - xs_reclassify_socket4(sock); + } else { + int abort_and_exit; - if (xs_bind4(transport, sock) < 0) { - sock_release(sock); - goto out; - } - } else + abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, + &xprt->state); /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + xs_tcp_reuse_connection(xprt, transport); + + if (abort_and_exit) + goto out_eagain; + } dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1696,83 +1782,109 @@ static void xs_tcp_connect_worker4(struct work_struct *work) dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - } + switch (status) { + default: + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + /* retry with existing socket, after a delay */ + case 0: + case -EINPROGRESS: + case -EALREADY: + xprt_clear_connecting(xprt); + return; } +out_eagain: + status = -EAGAIN; out: - xprt_wake_pending_tasks(xprt, status); -out_clear: xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); +} + +static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; + + /* start from scratch */ + err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", + -err); + goto out_err; + } + xs_reclassify_socket4(sock); + + if (xs_bind4(transport, sock) < 0) { + sock_release(sock); + goto out_err; + } + return sock; +out_err: + return ERR_PTR(-EIO); } /** - * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker6(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; - int err, status = -EIO; - if (xprt->shutdown) - goto out; + xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4); +} - if (!sock) { - /* start from scratch */ - if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket6(sock); +static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; + + /* start from scratch */ + err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", + -err); + goto out_err; + } + xs_reclassify_socket6(sock); - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out; - } - } else - /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out_err; + } + return sock; +out_err: + return ERR_PTR(-EIO); +} - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; - status = xs_tcp_finish_connecting(xprt, sock); - dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - } - } -out: - xprt_wake_pending_tasks(xprt, status); -out_clear: - xprt_clear_connecting(xprt); + xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6); } /** @@ -1817,9 +1929,6 @@ static void xs_tcp_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - /* Initiate graceful shutdown of the socket if not already done */ - if (test_bit(XPRT_CONNECTED, &xprt->state)) - xs_tcp_shutdown(xprt); /* Exit if we need to wait for socket shutdown to complete */ if (test_bit(XPRT_CLOSING, &xprt->state)) return; @@ -1901,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_shutdown, + .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index baac91049b0e..9dcc6e7f96ec 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -832,7 +832,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * All right, let's create it. */ mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current->fs->umask); + (SOCK_INODE(sock)->i_mode & ~current_umask()); err = mnt_want_write(nd.path.mnt); if (err) goto out_mknod_dput; diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig index 18495cdcd10d..1b46747a5f5a 100644 --- a/net/wimax/Kconfig +++ b/net/wimax/Kconfig @@ -8,7 +8,7 @@ # # As well, enablement of the RFKILL code means we need the INPUT layer # support to inject events coming from hw rfkill switches. That -# dependency could be killed if input.h provided appropiate means to +# dependency could be killed if input.h provided appropriate means to # work when input is disabled. comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled" diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index 5d149c1b5f0d..9ad4d893a566 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -149,7 +149,8 @@ struct sk_buff *wimax_msg_alloc(struct wimax_dev *wimax_dev, } result = nla_put(skb, WIMAX_GNL_MSG_DATA, size, msg); if (result < 0) { - dev_err(dev, "no memory to add payload in attribute\n"); + dev_err(dev, "no memory to add payload (msg %p size %zu) in " + "attribute: %d\n", msg, size, result); goto error_nla_put; } genlmsg_end(skb, genl_msg); @@ -299,10 +300,10 @@ int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name, struct sk_buff *skb; skb = wimax_msg_alloc(wimax_dev, pipe_name, buf, size, gfp_flags); - if (skb == NULL) - goto error_msg_new; - result = wimax_msg_send(wimax_dev, skb); -error_msg_new: + if (IS_ERR(skb)) + result = PTR_ERR(skb); + else + result = wimax_msg_send(wimax_dev, skb); return result; } EXPORT_SYMBOL_GPL(wimax_msg); diff --git a/net/wimax/stack.c b/net/wimax/stack.c index a0ee76b52510..933e1422b09f 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -338,8 +338,21 @@ out: */ void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state) { + /* + * A driver cannot take the wimax_dev out of the + * __WIMAX_ST_NULL state unless by calling wimax_dev_add(). If + * the wimax_dev's state is still NULL, we ignore any request + * to change its state because it means it hasn't been yet + * registered. + * + * There is no need to complain about it, as routines that + * call this might be shared from different code paths that + * are called before or after wimax_dev_add() has done its + * job. + */ mutex_lock(&wimax_dev->mutex); - __wimax_state_change(wimax_dev, new_state); + if (wimax_dev->state > __WIMAX_ST_NULL) + __wimax_state_change(wimax_dev, new_state); mutex_unlock(&wimax_dev->mutex); return; } @@ -376,7 +389,7 @@ EXPORT_SYMBOL_GPL(wimax_state_get); void wimax_dev_init(struct wimax_dev *wimax_dev) { INIT_LIST_HEAD(&wimax_dev->id_table_node); - __wimax_state_set(wimax_dev, WIMAX_ST_UNINITIALIZED); + __wimax_state_set(wimax_dev, __WIMAX_ST_NULL); mutex_init(&wimax_dev->mutex); mutex_init(&wimax_dev->mutex_reset); } diff --git a/net/wireless/core.h b/net/wireless/core.h index d43daa236ef9..0a592e4295f0 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -90,7 +90,7 @@ struct cfg80211_internal_bss { struct rb_node rbn; unsigned long ts; struct kref ref; - bool hold; + bool hold, ies_allocated; /* must be last because of priv member */ struct cfg80211_bss pub; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 353e1a4ece83..2456e4ee445e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3334,7 +3334,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!msg) return; @@ -3353,7 +3353,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC); return; nla_put_failure: diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6327e1617acb..487cb627ddba 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -907,6 +907,7 @@ EXPORT_SYMBOL(freq_reg_info); int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, const struct ieee80211_reg_rule **reg_rule) { + assert_cfg80211_lock(); return freq_reg_info_regd(wiphy, center_freq, bandwidth, reg_rule, NULL); } @@ -1133,7 +1134,8 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) if (is_world_regdom(cfg80211_regdomain->alpha2) || (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) return true; - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + if (last_request && + last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && wiphy->custom_regulatory) return true; return false; @@ -1142,6 +1144,12 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) /* Reap the advantages of previously found beacons */ static void reg_process_beacons(struct wiphy *wiphy) { + /* + * Means we are just firing up cfg80211, so no beacons would + * have been processed yet. + */ + if (!last_request) + return; if (!reg_is_world_roaming(wiphy)) return; wiphy_update_beacon_reg(wiphy); @@ -1176,6 +1184,8 @@ static void handle_channel_custom(struct wiphy *wiphy, struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; + assert_cfg80211_lock(); + sband = wiphy->bands[band]; BUG_ON(chan_idx >= sband->n_channels); chan = &sband->channels[chan_idx]; @@ -1214,10 +1224,13 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, const struct ieee80211_regdomain *regd) { enum ieee80211_band band; + + mutex_lock(&cfg80211_mutex); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) handle_band_custom(wiphy, band, regd); } + mutex_unlock(&cfg80211_mutex); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); @@ -1423,7 +1436,7 @@ new_request: return call_crda(last_request->alpha2); } -/* This currently only processes user and driver regulatory hints */ +/* This processes *all* regulatory hints */ static void reg_process_hint(struct regulatory_request *reg_request) { int r = 0; @@ -1538,6 +1551,13 @@ static int regulatory_hint_core(const char *alpha2) queue_regulatory_request(request); + /* + * This ensures last_request is populated once modules + * come swinging in and calling regulatory hints and + * wiphy_apply_custom_regulatory(). + */ + flush_scheduled_work(); + return 0; } @@ -2095,11 +2115,12 @@ int set_regdom(const struct ieee80211_regdomain *rd) /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { - struct wiphy *request_wiphy; + struct wiphy *request_wiphy = NULL; assert_cfg80211_lock(); - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (last_request) + request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); kfree(wiphy->regd); if (!last_request || !request_wiphy) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2a00e362f5fe..1f260c40b6ca 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -58,6 +58,10 @@ static void bss_release(struct kref *ref) bss = container_of(ref, struct cfg80211_internal_bss, ref); if (bss->pub.free_priv) bss->pub.free_priv(&bss->pub); + + if (bss->ies_allocated) + kfree(bss->pub.information_elements); + kfree(bss); } @@ -360,19 +364,42 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found = rb_find_bss(dev, res); - if (found && overwrite) { - list_replace(&found->list, &res->list); - rb_replace_node(&found->rbn, &res->rbn, - &dev->bss_tree); - kref_put(&found->ref, bss_release); - found = res; - } else if (found) { + if (found) { kref_get(&found->ref); found->pub.beacon_interval = res->pub.beacon_interval; found->pub.tsf = res->pub.tsf; found->pub.signal = res->pub.signal; found->pub.capability = res->pub.capability; found->ts = res->ts; + + /* overwrite IEs */ + if (overwrite) { + size_t used = dev->wiphy.bss_priv_size + sizeof(*res); + size_t ielen = res->pub.len_information_elements; + + if (ksize(found) >= used + ielen) { + memcpy(found->pub.information_elements, + res->pub.information_elements, ielen); + found->pub.len_information_elements = ielen; + } else { + u8 *ies = found->pub.information_elements; + + if (found->ies_allocated) { + if (ksize(ies) < ielen) + ies = krealloc(ies, ielen, + GFP_ATOMIC); + } else + ies = kmalloc(ielen, GFP_ATOMIC); + + if (ies) { + memcpy(ies, res->pub.information_elements, ielen); + found->ies_allocated = true; + found->pub.information_elements = ies; + found->pub.len_information_elements = ielen; + } + } + } + kref_put(&res->ref, bss_release); } else { /* this "consumes" the reference */ diff --git a/net/wireless/wext.c b/net/wireless/wext.c index cb6a5bb85d80..0e59f9ae9b81 100644 --- a/net/wireless/wext.c +++ b/net/wireless/wext.c @@ -786,6 +786,13 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, err = -EFAULT; goto out; } + + if (cmd == SIOCSIWENCODEEXT) { + struct iw_encode_ext *ee = (void *) extra; + + if (iwp->length < sizeof(*ee) + ee->key_len) + return -EFAULT; + } } err = handler(dev, info, (union iwreq_data *) iwp, extra); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 82271720d970..5f1f86565f16 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -794,7 +794,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, { static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); - unsigned int h; + unsigned int h, h_wildcard; struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; @@ -819,8 +819,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (best) goto found; - h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && |
