diff options
Diffstat (limited to 'net')
84 files changed, 4414 insertions, 2547 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 61fc573f1142..b3d17d1c49c3 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -98,14 +98,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_request_leave(dev); vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL); + + netdev_upper_dev_unlink(real_dev, dev); /* Because unregister_netdevice_queue() makes sure at least one rcu * grace period is respected before device freeing, * we dont need to call synchronize_net() here. */ unregister_netdevice_queue(dev, head); - netdev_upper_dev_unlink(real_dev, dev); - if (grp->nr_vlan_devs == 0) { vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); @@ -169,13 +169,13 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; - err = netdev_upper_dev_link(real_dev, dev); - if (err) - goto out_uninit_mvrp; - err = register_netdevice(dev); if (err < 0) - goto out_upper_dev_unlink; + goto out_uninit_mvrp; + + err = netdev_upper_dev_link(real_dev, dev); + if (err) + goto out_unregister_netdev; /* Account for reference in struct vlan_dev_priv */ dev_hold(real_dev); @@ -191,8 +191,8 @@ int register_vlan_dev(struct net_device *dev) return 0; -out_upper_dev_unlink: - netdev_upper_dev_unlink(real_dev, dev); +out_unregister_netdev: + unregister_netdevice(dev); out_uninit_mvrp: if (grp->nr_vlan_devs == 0) vlan_mvrp_uninit_applicant(real_dev); diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index dea6a287daca..6a791e73e39d 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -11,3 +11,5 @@ obj-$(CONFIG_BT_HIDP) += hidp/ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \ a2mp.o amp.o + +subdir-ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 9096137c889c..e6e1278dca89 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -490,6 +490,7 @@ int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(bt_sock_ioctl); +/* This function expects the sk lock to be held when called */ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) { DECLARE_WAITQUEUE(wait, current); @@ -525,6 +526,46 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) } EXPORT_SYMBOL(bt_sock_wait_state); +/* This function expects the sk lock to be held when called */ +int bt_sock_wait_ready(struct sock *sk, unsigned long flags) +{ + DECLARE_WAITQUEUE(wait, current); + unsigned long timeo; + int err = 0; + + BT_DBG("sk %p", sk); + + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + + add_wait_queue(sk_sleep(sk), &wait); + set_current_state(TASK_INTERRUPTIBLE); + while (test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags)) { + if (!timeo) { + err = -EAGAIN; + break; + } + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + break; + } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + set_current_state(TASK_INTERRUPTIBLE); + + err = sock_error(sk); + if (err) + break; + } + __set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + + return err; +} +EXPORT_SYMBOL(bt_sock_wait_ready); + #ifdef CONFIG_PROC_FS struct bt_seq_state { struct bt_sock_list *l; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index f0817121ec5e..514148b7a66b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -518,6 +518,7 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src) list_for_each_entry(d, &hci_dev_list, list) { if (!test_bit(HCI_UP, &d->flags) || test_bit(HCI_RAW, &d->flags) || + test_bit(HCI_USER_CHANNEL, &d->dev_flags) || d->dev_type != HCI_BREDR) continue; @@ -580,6 +581,9 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, { struct hci_conn *acl; + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + return ERR_PTR(-ENOTSUPP); + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add(hdev, ACL_LINK, dst); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fb7356fcfe51..82dbdc6a7e9e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -519,6 +519,8 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) if (lmp_bredr_capable(hdev)) bredr_setup(req); + else + clear_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); if (lmp_le_capable(hdev)) le_setup(req); @@ -607,6 +609,34 @@ static void hci_set_le_support(struct hci_request *req) &cp); } +static void hci_set_event_mask_page_2(struct hci_request *req) +{ + struct hci_dev *hdev = req->hdev; + u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + + /* If Connectionless Slave Broadcast master role is supported + * enable all necessary events for it. + */ + if (hdev->features[2][0] & 0x01) { + events[1] |= 0x40; /* Triggered Clock Capture */ + events[1] |= 0x80; /* Synchronization Train Complete */ + events[2] |= 0x10; /* Slave Page Response Timeout */ + events[2] |= 0x20; /* CSB Channel Map Change */ + } + + /* If Connectionless Slave Broadcast slave role is supported + * enable all necessary events for it. + */ + if (hdev->features[2][0] & 0x02) { + events[2] |= 0x01; /* Synchronization Train Received */ + events[2] |= 0x02; /* CSB Receive */ + events[2] |= 0x04; /* CSB Timeout */ + events[2] |= 0x08; /* Truncated Page Complete */ + } + + hci_req_add(req, HCI_OP_SET_EVENT_MASK_PAGE_2, sizeof(events), events); +} + static void hci_init3_req(struct hci_request *req, unsigned long opt) { struct hci_dev *hdev = req->hdev; @@ -648,6 +678,19 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) } } +static void hci_init4_req(struct hci_request *req, unsigned long opt) +{ + struct hci_dev *hdev = req->hdev; + + /* Set event mask page 2 if the HCI command for it is supported */ + if (hdev->commands[22] & 0x04) + hci_set_event_mask_page_2(req); + + /* Check for Synchronization Train support */ + if (hdev->features[2][0] & 0x04) + hci_req_add(req, HCI_OP_READ_SYNC_TRAIN_PARAMS, 0, NULL); +} + static int __hci_init(struct hci_dev *hdev) { int err; @@ -667,7 +710,11 @@ static int __hci_init(struct hci_dev *hdev) if (err < 0) return err; - return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); + err = __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); + if (err < 0) + return err; + + return __hci_req_sync(hdev, hci_init4_req, 0, HCI_INIT_TIMEOUT); } static void hci_scan_req(struct hci_request *req, unsigned long opt) @@ -984,6 +1031,16 @@ int hci_inquiry(void __user *arg) if (!hdev) return -ENODEV; + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = -EBUSY; + goto done; + } + + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + err = -EOPNOTSUPP; + goto done; + } + hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { @@ -1051,14 +1108,14 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr) if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) flags |= LE_AD_GENERAL; - if (!lmp_bredr_capable(hdev)) + if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + if (lmp_le_br_capable(hdev)) + flags |= LE_AD_SIM_LE_BREDR_CTRL; + if (lmp_host_le_br_capable(hdev)) + flags |= LE_AD_SIM_LE_BREDR_HOST; + } else { flags |= LE_AD_NO_BREDR; - - if (lmp_le_br_capable(hdev)) - flags |= LE_AD_SIM_LE_BREDR_CTRL; - - if (lmp_host_le_br_capable(hdev)) - flags |= LE_AD_SIM_LE_BREDR_HOST; + } if (flags) { BT_DBG("adv flags 0x%02x", flags); @@ -1126,17 +1183,10 @@ void hci_update_ad(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); } -/* ---- HCI ioctl helpers ---- */ - -int hci_dev_open(__u16 dev) +static int hci_dev_do_open(struct hci_dev *hdev) { - struct hci_dev *hdev; int ret = 0; - hdev = hci_dev_get(dev); - if (!hdev) - return -ENODEV; - BT_DBG("%s %p", hdev->name, hdev); hci_req_lock(hdev); @@ -1172,16 +1222,11 @@ int hci_dev_open(__u16 dev) ret = hdev->setup(hdev); if (!ret) { - /* Treat all non BR/EDR controllers as raw devices if - * enable_hs is not set. - */ - if (hdev->dev_type != HCI_BREDR && !enable_hs) - set_bit(HCI_RAW, &hdev->flags); - if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) set_bit(HCI_RAW, &hdev->flags); - if (!test_bit(HCI_RAW, &hdev->flags)) + if (!test_bit(HCI_RAW, &hdev->flags) && + !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) ret = __hci_init(hdev); } @@ -1192,6 +1237,7 @@ int hci_dev_open(__u16 dev) set_bit(HCI_UP, &hdev->flags); hci_notify(hdev, HCI_DEV_UP); if (!test_bit(HCI_SETUP, &hdev->dev_flags) && + !test_bit(HCI_USER_CHANNEL, &hdev->dev_flags) && mgmt_valid_hdev(hdev)) { hci_dev_lock(hdev); mgmt_powered(hdev, 1); @@ -1220,10 +1266,37 @@ int hci_dev_open(__u16 dev) done: hci_req_unlock(hdev); - hci_dev_put(hdev); return ret; } +/* ---- HCI ioctl helpers ---- */ + +int hci_dev_open(__u16 dev) +{ + struct hci_dev *hdev; + int err; + + hdev = hci_dev_get(dev); + if (!hdev) + return -ENODEV; + + /* We need to ensure that no other power on/off work is pending + * before proceeding to call hci_dev_do_open. This is + * particularly important if the setup procedure has not yet + * completed. + */ + if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + cancel_delayed_work(&hdev->power_off); + + flush_workqueue(hdev->req_workqueue); + + err = hci_dev_do_open(hdev); + + hci_dev_put(hdev); + + return err; +} + static int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); @@ -1328,11 +1401,17 @@ int hci_dev_close(__u16 dev) if (!hdev) return -ENODEV; + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = -EBUSY; + goto done; + } + if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) cancel_delayed_work(&hdev->power_off); err = hci_dev_do_close(hdev); +done: hci_dev_put(hdev); return err; } @@ -1348,8 +1427,15 @@ int hci_dev_reset(__u16 dev) hci_req_lock(hdev); - if (!test_bit(HCI_UP, &hdev->flags)) + if (!test_bit(HCI_UP, &hdev->flags)) { + ret = -ENETDOWN; goto done; + } + + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + ret = -EBUSY; + goto done; + } /* Drop queues */ skb_queue_purge(&hdev->rx_q); @@ -1384,10 +1470,15 @@ int hci_dev_reset_stat(__u16 dev) if (!hdev) return -ENODEV; + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + ret = -EBUSY; + goto done; + } + memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); +done: hci_dev_put(hdev); - return ret; } @@ -1404,6 +1495,16 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (!hdev) return -ENODEV; + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = -EBUSY; + goto done; + } + + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + err = -EOPNOTSUPP; + goto done; + } + switch (cmd) { case HCISETAUTH: err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, @@ -1462,6 +1563,7 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) break; } +done: hci_dev_put(hdev); return err; } @@ -1534,7 +1636,7 @@ int hci_get_dev_info(void __user *arg) strcpy(di.name, hdev->name); di.bdaddr = hdev->bdaddr; - di.type = (hdev->bus & 0x0f) | (hdev->dev_type << 4); + di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); di.flags = hdev->flags; di.pkt_type = hdev->pkt_type; if (lmp_bredr_capable(hdev)) { @@ -1570,6 +1672,9 @@ static int hci_rfkill_set_block(void *data, bool blocked) BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) + return -EBUSY; + if (blocked) { set_bit(HCI_RFKILLED, &hdev->dev_flags); if (!test_bit(HCI_SETUP, &hdev->dev_flags)) @@ -1592,7 +1697,7 @@ static void hci_power_on(struct work_struct *work) BT_DBG("%s", hdev->name); - err = hci_dev_open(hdev->id); + err = hci_dev_do_open(hdev); if (err < 0) { mgmt_set_powered_failed(hdev, err); return; @@ -2225,8 +2330,13 @@ int hci_register_dev(struct hci_dev *hdev) set_bit(HCI_SETUP, &hdev->dev_flags); - if (hdev->dev_type != HCI_AMP) + if (hdev->dev_type != HCI_AMP) { set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + /* Assume BR/EDR support until proven otherwise (such as + * through reading supported features during init. + */ + set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + } write_lock(&hci_dev_list_lock); list_add(&hdev->list, &hci_dev_list); @@ -3272,15 +3382,13 @@ static void hci_tx_work(struct work_struct *work) BT_DBG("%s acl %d sco %d le %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt, hdev->le_cnt); - /* Schedule queues and send stuff to HCI driver */ - - hci_sched_acl(hdev); - - hci_sched_sco(hdev); - - hci_sched_esco(hdev); - - hci_sched_le(hdev); + if (!test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + /* Schedule queues and send stuff to HCI driver */ + hci_sched_acl(hdev); + hci_sched_sco(hdev); + hci_sched_esco(hdev); + hci_sched_le(hdev); + } /* Send next queued raw (unknown type) packet */ while ((skb = skb_dequeue(&hdev->raw_q))) @@ -3471,7 +3579,8 @@ static void hci_rx_work(struct work_struct *work) hci_send_to_sock(hdev, skb); } - if (test_bit(HCI_RAW, &hdev->flags)) { + if (test_bit(HCI_RAW, &hdev->flags) || + test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { kfree_skb(skb); continue; } @@ -3526,7 +3635,7 @@ static void hci_cmd_work(struct work_struct *work) kfree_skb(hdev->sent_cmd); - hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC); + hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); if (hdev->sent_cmd) { atomic_dec(&hdev->cmd_cnt); hci_send_frame(skb); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8db3e89fae35..4785ab0795f5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -297,6 +297,11 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) goto done; } + /* We need to ensure that we set this back on if someone changed + * the scan mode through a raw HCI socket. + */ + set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + old_pscan = test_and_clear_bit(HCI_PSCAN, &hdev->flags); old_iscan = test_and_clear_bit(HCI_ISCAN, &hdev->flags); @@ -994,20 +999,20 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev, return; if (!status) { - if (sent->le) + if (sent->le) { hdev->features[1][0] |= LMP_HOST_LE; - else + set_bit(HCI_LE_ENABLED, &hdev->dev_flags); + } else { hdev->features[1][0] &= ~LMP_HOST_LE; + clear_bit(HCI_LE_ENABLED, &hdev->dev_flags); + clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); + } if (sent->simul) hdev->features[1][0] |= LMP_HOST_LE_BREDR; else hdev->features[1][0] &= ~LMP_HOST_LE_BREDR; } - - if (test_bit(HCI_MGMT, &hdev->dev_flags) && - !test_bit(HCI_INIT, &hdev->flags)) - mgmt_le_enable_complete(hdev, sent->le, status); } static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 9bd7d959e384..579886186c3a 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -66,6 +66,46 @@ static struct bt_sock_list hci_sk_list = { .lock = __RW_LOCK_UNLOCKED(hci_sk_list.lock) }; +static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb) +{ + struct hci_filter *flt; + int flt_type, flt_event; + + /* Apply filter */ + flt = &hci_pi(sk)->filter; + + if (bt_cb(skb)->pkt_type == HCI_VENDOR_PKT) + flt_type = 0; + else + flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS; + + if (!test_bit(flt_type, &flt->type_mask)) + return true; + + /* Extra filter for event packets only */ + if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT) + return false; + + flt_event = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); + + if (!hci_test_bit(flt_event, &flt->event_mask)) + return true; + + /* Check filter only when opcode is set */ + if (!flt->opcode) + return false; + + if (flt_event == HCI_EV_CMD_COMPLETE && + flt->opcode != get_unaligned((__le16 *)(skb->data + 3))) + return true; + + if (flt_event == HCI_EV_CMD_STATUS && + flt->opcode != get_unaligned((__le16 *)(skb->data + 4))) + return true; + + return false; +} + /* Send frame to RAW socket */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) { @@ -77,7 +117,6 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - struct hci_filter *flt; struct sk_buff *nskb; if (sk->sk_state != BT_BOUND || hci_pi(sk)->hdev != hdev) @@ -87,31 +126,19 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (skb->sk == sk) continue; - if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) - continue; - - /* Apply filter */ - flt = &hci_pi(sk)->filter; - - if (!test_bit((bt_cb(skb)->pkt_type == HCI_VENDOR_PKT) ? - 0 : (bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS), - &flt->type_mask)) - continue; - - if (bt_cb(skb)->pkt_type == HCI_EVENT_PKT) { - int evt = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); - - if (!hci_test_bit(evt, &flt->event_mask)) + if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) { + if (is_filtered_packet(sk, skb)) continue; - - if (flt->opcode && - ((evt == HCI_EV_CMD_COMPLETE && - flt->opcode != - get_unaligned((__le16 *)(skb->data + 3))) || - (evt == HCI_EV_CMD_STATUS && - flt->opcode != - get_unaligned((__le16 *)(skb->data + 4))))) + } else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { + if (!bt_cb(skb)->incoming) + continue; + if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT && + bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && + bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) continue; + } else { + /* Don't send frame to other channel types */ + continue; } if (!skb_copy) { @@ -426,6 +453,12 @@ static int hci_sock_release(struct socket *sock) bt_sock_unlink(&hci_sk_list, sk); if (hdev) { + if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { + mgmt_index_added(hdev); + clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags); + hci_dev_close(hdev->id); + } + atomic_dec(&hdev->promisc); hci_dev_put(hdev); } @@ -482,6 +515,9 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, if (!hdev) return -EBADFD; + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) + return -EBUSY; + switch (cmd) { case HCISETRAW: if (!capable(CAP_NET_ADMIN)) @@ -512,23 +548,32 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, if (!capable(CAP_NET_ADMIN)) return -EPERM; return hci_sock_blacklist_del(hdev, (void __user *) arg); - - default: - if (hdev->ioctl) - return hdev->ioctl(hdev, cmd, arg); - return -EINVAL; } + + if (hdev->ioctl) + return hdev->ioctl(hdev, cmd, arg); + + return -EINVAL; } static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct sock *sk = sock->sk; void __user *argp = (void __user *) arg; + struct sock *sk = sock->sk; int err; BT_DBG("cmd %x arg %lx", cmd, arg); + lock_sock(sk); + + if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { + err = -EBADFD; + goto done; + } + + release_sock(sk); + switch (cmd) { case HCIGETDEVLIST: return hci_get_dev_list(argp); @@ -573,13 +618,15 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, case HCIINQUIRY: return hci_inquiry(argp); - - default: - lock_sock(sk); - err = hci_sock_bound_ioctl(sk, cmd, arg); - release_sock(sk); - return err; } + + lock_sock(sk); + + err = hci_sock_bound_ioctl(sk, cmd, arg); + +done: + release_sock(sk); + return err; } static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, @@ -629,6 +676,56 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, hci_pi(sk)->hdev = hdev; break; + case HCI_CHANNEL_USER: + if (hci_pi(sk)->hdev) { + err = -EALREADY; + goto done; + } + + if (haddr.hci_dev == HCI_DEV_NONE) { + err = -EINVAL; + goto done; + } + + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + goto done; + } + + hdev = hci_dev_get(haddr.hci_dev); + if (!hdev) { + err = -ENODEV; + goto done; + } + + if (test_bit(HCI_UP, &hdev->flags) || + test_bit(HCI_INIT, &hdev->flags) || + test_bit(HCI_SETUP, &hdev->dev_flags)) { + err = -EBUSY; + hci_dev_put(hdev); + goto done; + } + + if (test_and_set_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = -EUSERS; + hci_dev_put(hdev); + goto done; + } + + mgmt_index_removed(hdev); + + err = hci_dev_open(hdev->id); + if (err) { + clear_bit(HCI_USER_CHANNEL, &hdev->dev_flags); + hci_dev_put(hdev); + goto done; + } + + atomic_inc(&hdev->promisc); + + hci_pi(sk)->hdev = hdev; + break; + case HCI_CHANNEL_CONTROL: if (haddr.hci_dev != HCI_DEV_NONE) { err = -EINVAL; @@ -677,22 +774,30 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, { struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr; struct sock *sk = sock->sk; - struct hci_dev *hdev = hci_pi(sk)->hdev; + struct hci_dev *hdev; + int err = 0; BT_DBG("sock %p sk %p", sock, sk); - if (!hdev) - return -EBADFD; + if (peer) + return -EOPNOTSUPP; lock_sock(sk); + hdev = hci_pi(sk)->hdev; + if (!hdev) { + err = -EBADFD; + goto done; + } + *addr_len = sizeof(*haddr); haddr->hci_family = AF_BLUETOOTH; haddr->hci_dev = hdev->id; - haddr->hci_channel= 0; + haddr->hci_channel= hci_pi(sk)->channel; +done: release_sock(sk); - return 0; + return err; } static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, @@ -767,6 +872,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, case HCI_CHANNEL_RAW: hci_sock_cmsg(sk, msg, skb); break; + case HCI_CHANNEL_USER: case HCI_CHANNEL_CONTROL: case HCI_CHANNEL_MONITOR: sock_recv_timestamp(msg, sk, skb); @@ -801,6 +907,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, switch (hci_pi(sk)->channel) { case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: break; case HCI_CHANNEL_CONTROL: err = mgmt_control(sk, msg, len); @@ -837,7 +944,8 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, skb_pull(skb, 1); skb->dev = (void *) hdev; - if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { + if (hci_pi(sk)->channel == HCI_CHANNEL_RAW && + bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); @@ -868,6 +976,14 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto drop; } + if (hci_pi(sk)->channel == HCI_CHANNEL_USER && + bt_cb(skb)->pkt_type != HCI_COMMAND_PKT && + bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT && + bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) { + err = -EINVAL; + goto drop; + } + skb_queue_tail(&hdev->raw_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } @@ -895,7 +1011,7 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { - err = -EINVAL; + err = -EBADFD; goto done; } @@ -981,7 +1097,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { - err = -EINVAL; + err = -EBADFD; goto done; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 63fa11109a1c..02dba4e6df96 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1016,13 +1016,12 @@ static bool __amp_capable(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; - if (enable_hs && - hci_amp_capable() && + if (conn->hs_enabled && hci_amp_capable() && chan->chan_policy == BT_CHANNEL_POLICY_AMP_PREFERRED && conn->fixed_chan_mask & L2CAP_FC_A2MP) return true; - else - return false; + + return false; } static bool l2cap_check_efs(struct l2cap_chan *chan) @@ -1638,6 +1637,10 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->feat_mask = 0; + if (hcon->type == ACL_LINK) + conn->hs_enabled = test_bit(HCI_HS_ENABLED, + &hcon->hdev->dev_flags); + spin_lock_init(&conn->lock); mutex_init(&conn->chan_lock); @@ -3084,14 +3087,14 @@ static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) } } -static inline bool __l2cap_ews_supported(struct l2cap_chan *chan) +static inline bool __l2cap_ews_supported(struct l2cap_conn *conn) { - return enable_hs && chan->conn->feat_mask & L2CAP_FEAT_EXT_WINDOW; + return conn->hs_enabled && conn->feat_mask & L2CAP_FEAT_EXT_WINDOW; } -static inline bool __l2cap_efs_supported(struct l2cap_chan *chan) +static inline bool __l2cap_efs_supported(struct l2cap_conn *conn) { - return enable_hs && chan->conn->feat_mask & L2CAP_FEAT_EXT_FLOW; + return conn->hs_enabled && conn->feat_mask & L2CAP_FEAT_EXT_FLOW; } static void __l2cap_set_ertm_timeouts(struct l2cap_chan *chan, @@ -3135,7 +3138,7 @@ static void __l2cap_set_ertm_timeouts(struct l2cap_chan *chan, static inline void l2cap_txwin_setup(struct l2cap_chan *chan) { if (chan->tx_win > L2CAP_DEFAULT_TX_WINDOW && - __l2cap_ews_supported(chan)) { + __l2cap_ews_supported(chan->conn)) { /* use extended control field */ set_bit(FLAG_EXT_CTRL, &chan->flags); chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; @@ -3165,7 +3168,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state)) break; - if (__l2cap_efs_supported(chan)) + if (__l2cap_efs_supported(chan->conn)) set_bit(FLAG_EFS_ENABLE, &chan->flags); /* fall through */ @@ -3317,7 +3320,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data) break; case L2CAP_CONF_EWS: - if (!enable_hs) + if (!chan->conn->hs_enabled) return -ECONNREFUSED; set_bit(FLAG_EXT_CTRL, &chan->flags); @@ -3349,7 +3352,7 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data) } if (remote_efs) { - if (__l2cap_efs_supported(chan)) + if (__l2cap_efs_supported(chan->conn)) set_bit(FLAG_EFS_ENABLE, &chan->flags); else return -ECONNREFUSED; @@ -3891,13 +3894,13 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, if (scid) { chan = __l2cap_get_chan_by_scid(conn, scid); if (!chan) { - err = -EFAULT; + err = -EBADSLT; goto unlock; } } else { chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) { - err = -EFAULT; + err = -EBADSLT; goto unlock; } } @@ -3985,7 +3988,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, chan = l2cap_get_chan_by_scid(conn, dcid); if (!chan) - return -ENOENT; + return -EBADSLT; if (chan->state != BT_CONFIG && chan->state != BT_CONNECT2) { struct l2cap_cmd_rej_cid rej; @@ -4213,7 +4216,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan = __l2cap_get_chan_by_scid(conn, dcid); if (!chan) { mutex_unlock(&conn->chan_lock); - return 0; + return -EBADSLT; } l2cap_chan_lock(chan); @@ -4303,7 +4306,7 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, if (!disable_ertm) feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING | L2CAP_FEAT_FCS; - if (enable_hs) + if (conn->hs_enabled) feat_mask |= L2CAP_FEAT_EXT_FLOW | L2CAP_FEAT_EXT_WINDOW; @@ -4314,7 +4317,7 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, u8 buf[12]; struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; - if (enable_hs) + if (conn->hs_enabled) l2cap_fixed_chan[0] |= L2CAP_FC_A2MP; else l2cap_fixed_chan[0] &= ~L2CAP_FC_A2MP; @@ -4411,7 +4414,7 @@ static int l2cap_create_channel_req(struct l2cap_conn *conn, if (cmd_len != sizeof(*req)) return -EPROTO; - if (!enable_hs) + if (!conn->hs_enabled) return -EINVAL; psm = le16_to_cpu(req->psm); @@ -4445,7 +4448,7 @@ static int l2cap_create_channel_req(struct l2cap_conn *conn, hs_hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, conn->dst); if (!hs_hcon) { hci_dev_put(hdev); - return -EFAULT; + return -EBADSLT; } BT_DBG("mgr %p bredr_chan %p hs_hcon %p", mgr, chan, hs_hcon); @@ -4469,7 +4472,7 @@ error: l2cap_send_cmd(conn, cmd->ident, L2CAP_CREATE_CHAN_RSP, sizeof(rsp), &rsp); - return -EFAULT; + return 0; } static void l2cap_send_move_chan_req(struct l2cap_chan *chan, u8 dest_amp_id) @@ -4838,7 +4841,7 @@ static inline int l2cap_move_channel_req(struct l2cap_conn *conn, BT_DBG("icid 0x%4.4x, dest_amp_id %d", icid, req->dest_amp_id); - if (!enable_hs) + if (!conn->hs_enabled) return -EINVAL; chan = l2cap_get_chan_by_dcid(conn, icid); @@ -5219,7 +5222,7 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, case L2CAP_CONN_RSP: case L2CAP_CREATE_CHAN_RSP: - err = l2cap_connect_create_rsp(conn, cmd, cmd_len, data); + l2cap_connect_create_rsp(conn, cmd, cmd_len, data); break; case L2CAP_CONF_REQ: @@ -5227,7 +5230,7 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, break; case L2CAP_CONF_RSP: - err = l2cap_config_rsp(conn, cmd, cmd_len, data); + l2cap_config_rsp(conn, cmd, cmd_len, data); break; case L2CAP_DISCONN_REQ: @@ -5235,7 +5238,7 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, break; case L2CAP_DISCONN_RSP: - err = l2cap_disconnect_rsp(conn, cmd, cmd_len, data); + l2cap_disconnect_rsp(conn, cmd, cmd_len, data); break; case L2CAP_ECHO_REQ: @@ -5250,7 +5253,7 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, break; case L2CAP_INFO_RSP: - err = l2cap_information_rsp(conn, cmd, cmd_len, data); + l2cap_information_rsp(conn, cmd, cmd_len, data); break; case L2CAP_CREATE_CHAN_REQ: @@ -5262,7 +5265,7 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, break; case L2CAP_MOVE_CHAN_RSP: - err = l2cap_move_channel_rsp(conn, cmd, cmd_len, data); + l2cap_move_channel_rsp(conn, cmd, cmd_len, data); break; case L2CAP_MOVE_CHAN_CFM: @@ -5270,7 +5273,7 @@ static inline int l2cap_bredr_sig_cmd(struct l2cap_conn *conn, break; case L2CAP_MOVE_CHAN_CFM_RSP: - err = l2cap_move_channel_confirm_rsp(conn, cmd, cmd_len, data); + l2cap_move_channel_confirm_rsp(conn, cmd, cmd_len, data); break; default: @@ -5301,54 +5304,65 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, } } +static __le16 l2cap_err_to_reason(int err) +{ + switch (err) { + case -EBADSLT: + return __constant_cpu_to_le16(L2CAP_REJ_INVALID_CID); + case -EMSGSIZE: + return __constant_cpu_to_le16(L2CAP_REJ_MTU_EXCEEDED); + case -EINVAL: + case -EPROTO: + default: + return __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + } +} + static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { - u8 *data = skb->data; - int len = skb->len; - struct l2cap_cmd_hdr cmd; + struct hci_conn *hcon = conn->hcon; + struct l2cap_cmd_hdr *cmd; + u16 len; int err; - l2cap_raw_recv(conn, skb); + if (hcon->type != LE_LINK) + goto drop; - while (len >= L2CAP_CMD_HDR_SIZE) { - u16 cmd_len; - memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); - data += L2CAP_CMD_HDR_SIZE; - len -= L2CAP_CMD_HDR_SIZE; + if (skb->len < L2CAP_CMD_HDR_SIZE) + goto drop; - cmd_len = le16_to_cpu(cmd.len); + cmd = (void *) skb->data; + skb_pull(skb, L2CAP_CMD_HDR_SIZE); - BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, - cmd.ident); + len = le16_to_cpu(cmd->len); - if (cmd_len > len || !cmd.ident) { - BT_DBG("corrupted command"); - break; - } + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd->code, len, cmd->ident); - err = l2cap_le_sig_cmd(conn, &cmd, data); - if (err) { - struct l2cap_cmd_rej_unk rej; + if (len != skb->len || !cmd->ident) { + BT_DBG("corrupted command"); + goto drop; + } - BT_ERR("Wrong link type (%d)", err); + err = l2cap_le_sig_cmd(conn, cmd, skb->data); + if (err) { + struct l2cap_cmd_rej_unk rej; - /* FIXME: Map err to a valid reason */ - rej.reason = __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); - l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, - sizeof(rej), &rej); - } + BT_ERR("Wrong link type (%d)", err); - data += cmd_len; - len -= cmd_len; + rej.reason = l2cap_err_to_reason(err); + l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ, + sizeof(rej), &rej); } +drop: kfree_skb(skb); } static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { + struct hci_conn *hcon = conn->hcon; u8 *data = skb->data; int len = skb->len; struct l2cap_cmd_hdr cmd; @@ -5356,6 +5370,9 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, l2cap_raw_recv(conn, skb); + if (hcon->type != ACL_LINK) + goto drop; + while (len >= L2CAP_CMD_HDR_SIZE) { u16 cmd_len; memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); @@ -5378,8 +5395,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, BT_ERR("Wrong link type (%d)", err); - /* FIXME: Map err to a valid reason */ - rej.reason = __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + rej.reason = l2cap_err_to_reason(err); l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } @@ -5388,6 +5404,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, len -= cmd_len; } +drop: kfree_skb(skb); } @@ -5784,7 +5801,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, struct sk_buff *skb, u8 event) { int err = 0; - bool skb_in_use = 0; + bool skb_in_use = false; BT_DBG("chan %p, control %p, skb %p, event %d", chan, control, skb, event); @@ -5805,7 +5822,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, control->txseq); chan->buffer_seq = chan->expected_tx_seq; - skb_in_use = 1; + skb_in_use = true; err = l2cap_reassemble_sdu(chan, skb, control); if (err) @@ -5841,7 +5858,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, * current frame is stored for later use. */ skb_queue_tail(&chan->srej_q, skb); - skb_in_use = 1; + skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); @@ -5919,7 +5936,7 @@ static int l2cap_rx_state_srej_sent(struct l2cap_chan *chan, { int err = 0; u16 txseq = control->txseq; - bool skb_in_use = 0; + bool skb_in_use = false; BT_DBG("chan %p, control %p, skb %p, event %d", chan, control, skb, event); @@ -5931,7 +5948,7 @@ static int l2cap_rx_state_srej_sent(struct l2cap_chan *chan, /* Keep frame for reassembly later */ l2cap_pass_to_tx(chan, control); skb_queue_tail(&chan->srej_q, skb); - skb_in_use = 1; + skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); @@ -5942,7 +5959,7 @@ static int l2cap_rx_state_srej_sent(struct l2cap_chan *chan, l2cap_pass_to_tx(chan, control); skb_queue_tail(&chan->srej_q, skb); - skb_in_use = 1; + skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); @@ -5957,7 +5974,7 @@ static int l2cap_rx_state_srej_sent(struct l2cap_chan *chan, * the missing frames. */ skb_queue_tail(&chan->srej_q, skb); - skb_in_use = 1; + skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); @@ -5971,7 +5988,7 @@ static int l2cap_rx_state_srej_sent(struct l2cap_chan *chan, * SREJ'd frames. */ skb_queue_tail(&chan->srej_q, skb); - skb_in_use = 1; + skb_in_use = true; BT_DBG("Queued %p (queue len %d)", skb, skb_queue_len(&chan->srej_q)); @@ -6380,8 +6397,12 @@ done: static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, struct sk_buff *skb) { + struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan; + if (hcon->type != ACL_LINK) + goto drop; + chan = l2cap_global_chan_by_psm(0, psm, conn->src, conn->dst); if (!chan) goto drop; @@ -6404,8 +6425,12 @@ drop: static void l2cap_att_channel(struct l2cap_conn *conn, struct sk_buff *skb) { + struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan; + if (hcon->type != LE_LINK) + goto drop; + chan = l2cap_global_chan_by_scid(BT_CONNECTED, L2CAP_CID_ATT, conn->src, conn->dst); if (!chan) @@ -6441,9 +6466,6 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("len %d, cid 0x%4.4x", len, cid); switch (cid) { - case L2CAP_CID_LE_SIGNALING: - l2cap_le_sig_channel(conn, skb); - break; case L2CAP_CID_SIGNALING: l2cap_sig_channel(conn, skb); break; @@ -6458,6 +6480,10 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) l2cap_att_channel(conn, skb); break; + case L2CAP_CID_LE_SIGNALING: + l2cap_le_sig_channel(conn, skb); + break; + case L2CAP_CID_SMP: if (smp_sig_channel(conn, skb)) l2cap_conn_del(conn->hcon, EACCES); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 0098af80b213..9119898ef040 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -445,11 +445,6 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, break; case BT_CHANNEL_POLICY: - if (!enable_hs) { - err = -ENOPROTOOPT; - break; - } - if (put_user(chan->chan_policy, (u32 __user *) optval)) err = -EFAULT; break; @@ -720,11 +715,6 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_CHANNEL_POLICY: - if (!enable_hs) { - err = -ENOPROTOOPT; - break; - } - if (get_user(opt, (u32 __user *) optval)) { err = -EFAULT; break; @@ -777,6 +767,12 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state != BT_CONNECTED) return -ENOTCONN; + lock_sock(sk); + err = bt_sock_wait_ready(sk, msg->msg_flags); + release_sock(sk); + if (err) + return err; + l2cap_chan_lock(chan); err = l2cap_chan_send(chan, msg, len, sk->sk_priority); l2cap_chan_unlock(chan); @@ -799,8 +795,8 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pi->chan->state = BT_CONFIG; __l2cap_connect_rsp_defer(pi->chan); - release_sock(sk); - return 0; + err = 0; + goto done; } release_sock(sk); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index fedc5399d465..16125ff918f1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -32,10 +32,8 @@ #include <net/bluetooth/mgmt.h> #include <net/bluetooth/smp.h> -bool enable_hs; - #define MGMT_VERSION 1 -#define MGMT_REVISION 3 +#define MGMT_REVISION 4 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -76,6 +74,9 @@ static const u16 mgmt_commands[] = { MGMT_OP_BLOCK_DEVICE, MGMT_OP_UNBLOCK_DEVICE, MGMT_OP_SET_DEVICE_ID, + MGMT_OP_SET_ADVERTISING, + MGMT_OP_SET_BREDR, + MGMT_OP_SET_STATIC_ADDRESS, }; static const u16 mgmt_events[] = { @@ -339,6 +340,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data, if (test_bit(HCI_SETUP, &d->dev_flags)) continue; + if (test_bit(HCI_USER_CHANNEL, &d->dev_flags)) + continue; + if (!mgmt_valid_hdev(d)) continue; @@ -376,13 +380,13 @@ static u32 get_supported_settings(struct hci_dev *hdev) settings |= MGMT_SETTING_DISCOVERABLE; settings |= MGMT_SETTING_BREDR; settings |= MGMT_SETTING_LINK_SECURITY; - } - - if (enable_hs) settings |= MGMT_SETTING_HS; + } - if (lmp_le_capable(hdev)) + if (lmp_le_capable(hdev)) { settings |= MGMT_SETTING_LE; + settings |= MGMT_SETTING_ADVERTISING; + } return settings; } @@ -406,7 +410,7 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_PAIRABLE, &hdev->dev_flags)) settings |= MGMT_SETTING_PAIRABLE; - if (lmp_bredr_capable(hdev)) + if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) settings |= MGMT_SETTING_BREDR; if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) @@ -421,6 +425,9 @@ static u32 get_current_settings(struct hci_dev *hdev) if (test_bit(HCI_HS_ENABLED, &hdev->dev_flags)) settings |= MGMT_SETTING_HS; + if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) + settings |= MGMT_SETTING_ADVERTISING; + return settings; } @@ -804,6 +811,12 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); + if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, + MGMT_STATUS_BUSY); + goto failed; + } + if (test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { cancel_delayed_work(&hdev->power_off); @@ -820,12 +833,6 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) { - err = cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED, - MGMT_STATUS_BUSY); - goto failed; - } - cmd = mgmt_pending_add(sk, MGMT_OP_SET_POWERED, hdev, data, len); if (!cmd) { err = -ENOMEM; @@ -883,20 +890,71 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip) return mgmt_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, sizeof(ev), skip); } +struct cmd_lookup { + struct sock *sk; + struct hci_dev *hdev; + u8 mgmt_status; +}; + +static void settings_rsp(struct pending_cmd *cmd, void *data) +{ + struct cmd_lookup *match = data; + + send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); + + list_del(&cmd->list); + + if (match->sk == NULL) { + match->sk = cmd->sk; + sock_hold(match->sk); + } + + mgmt_pending_free(cmd); +} + +static void cmd_status_rsp(struct pending_cmd *cmd, void *data) +{ + u8 *status = data; + + cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); + mgmt_pending_remove(cmd); +} + +static u8 mgmt_bredr_support(struct hci_dev *hdev) +{ + if (!lmp_bredr_capable(hdev)) + return MGMT_STATUS_NOT_SUPPORTED; + else if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + return MGMT_STATUS_REJECTED; + else + return MGMT_STATUS_SUCCESS; +} + +static u8 mgmt_le_support(struct hci_dev *hdev) +{ + if (!lmp_le_capable(hdev)) + return MGMT_STATUS_NOT_SUPPORTED; + else if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + return MGMT_STATUS_REJECTED; + else + return MGMT_STATUS_SUCCESS; +} + static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_set_discoverable *cp = data; struct pending_cmd *cmd; u16 timeout; - u8 scan; + u8 scan, status; int err; BT_DBG("request for %s", hdev->name); - if (!lmp_bredr_capable(hdev)) + status = mgmt_bredr_support(hdev); + if (status) return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, - MGMT_STATUS_NOT_SUPPORTED); + status); if (cp->val != 0x00 && cp->val != 0x01) return cmd_status(sk, hdev->id, MGMT_OP_SET_DISCOVERABLE, @@ -1045,14 +1103,15 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, struct mgmt_mode *cp = data; struct pending_cmd *cmd; struct hci_request req; - u8 scan; + u8 scan, status; int err; BT_DBG("request for %s", hdev->name); - if (!lmp_bredr_capable(hdev)) + status = mgmt_bredr_support(hdev); + if (status) return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, - MGMT_STATUS_NOT_SUPPORTED); + status); if (cp->val != 0x00 && cp->val != 0x01) return cmd_status(sk, hdev->id, MGMT_OP_SET_CONNECTABLE, @@ -1168,14 +1227,15 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_mode *cp = data; struct pending_cmd *cmd; - u8 val; + u8 val, status; int err; BT_DBG("request for %s", hdev->name); - if (!lmp_bredr_capable(hdev)) + status = mgmt_bredr_support(hdev); + if (status) return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, - MGMT_STATUS_NOT_SUPPORTED); + status); if (cp->val != 0x00 && cp->val != 0x01) return cmd_status(sk, hdev->id, MGMT_OP_SET_LINK_SECURITY, @@ -1236,11 +1296,15 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; struct pending_cmd *cmd; - u8 val; + u8 val, status; int err; BT_DBG("request for %s", hdev->name); + status = mgmt_bredr_support(hdev); + if (status) + return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, status); + if (!lmp_ssp_capable(hdev)) return cmd_status(sk, hdev->id, MGMT_OP_SET_SSP, MGMT_STATUS_NOT_SUPPORTED); @@ -1302,23 +1366,64 @@ failed: static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; + bool changed; + u8 status; + int err; BT_DBG("request for %s", hdev->name); - if (!enable_hs) - return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, - MGMT_STATUS_NOT_SUPPORTED); + status = mgmt_bredr_support(hdev); + if (status) + return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status); if (cp->val != 0x00 && cp->val != 0x01) return cmd_status(sk, hdev->id, MGMT_OP_SET_HS, MGMT_STATUS_INVALID_PARAMS); - if (cp->val) - set_bit(HCI_HS_ENABLED, &hdev->dev_flags); - else - clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + hci_dev_lock(hdev); + + if (cp->val) { + changed = !test_and_set_bit(HCI_HS_ENABLED, &hdev->dev_flags); + } else { + if (hdev_is_powered(hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_HS, + MGMT_STATUS_REJECTED); + goto unlock; + } + + changed = test_and_clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + } + + err = send_settings_rsp(sk, MGMT_OP_SET_HS, hdev); + if (err < 0) + goto unlock; + + if (changed) + err = new_settings(hdev, sk); + +unlock: + hci_dev_unlock(hdev); + return err; +} + +static void le_enable_complete(struct hci_dev *hdev, u8 status) +{ + struct cmd_lookup match = { NULL, hdev }; - return send_settings_rsp(sk, MGMT_OP_SET_HS, hdev); + if (status) { + u8 mgmt_err = mgmt_status(status); + + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, + &mgmt_err); + return; + } + + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); + + new_settings(hdev, match.sk); + + if (match.sk) + sock_put(match.sk); } static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) @@ -1326,6 +1431,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) struct mgmt_mode *cp = data; struct hci_cp_write_le_host_supported hci_cp; struct pending_cmd *cmd; + struct hci_request req; int err; u8 val, enabled; @@ -1340,7 +1446,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) MGMT_STATUS_INVALID_PARAMS); /* LE-only devices do not allow toggling LE on/off */ - if (!lmp_bredr_capable(hdev)) + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_REJECTED); @@ -1357,6 +1463,11 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) changed = true; } + if (!val && test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) { + clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); + changed = true; + } + err = send_settings_rsp(sk, MGMT_OP_SET_LE, hdev); if (err < 0) goto unlock; @@ -1367,7 +1478,8 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) goto unlock; } - if (mgmt_pending_find(MGMT_OP_SET_LE, hdev)) { + if (mgmt_pending_find(MGMT_OP_SET_LE, hdev) || + mgmt_pending_find(MGMT_OP_SET_ADVERTISING, hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE, MGMT_STATUS_BUSY); goto unlock; @@ -1386,8 +1498,15 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) hci_cp.simul = lmp_le_br_capable(hdev); } - err = hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(hci_cp), - &hci_cp); + hci_req_init(&req, hdev); + + if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags) && !val) + hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(val), &val); + + hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(hci_cp), + &hci_cp); + + err = hci_req_run(&req, le_enable_complete); if (err < 0) mgmt_pending_remove(cmd); @@ -1706,6 +1825,12 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, u16 key_count, expected_len; int i; + BT_DBG("request for %s", hdev->name); + + if (!lmp_bredr_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, + MGMT_STATUS_NOT_SUPPORTED); + key_count = __le16_to_cpu(cp->key_count); expected_len = sizeof(*cp) + key_count * @@ -2685,6 +2810,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, struct hci_request req; /* General inquiry access code (GIAC) */ u8 lap[3] = { 0x33, 0x8b, 0x9e }; + u8 status; int err; BT_DBG("%s", hdev->name); @@ -2721,9 +2847,10 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: - if (!lmp_bredr_capable(hdev)) { + status = mgmt_bredr_support(hdev); + if (status) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_NOT_SUPPORTED); + status); mgmt_pending_remove(cmd); goto failed; } @@ -2745,15 +2872,16 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, case DISCOV_TYPE_LE: case DISCOV_TYPE_INTERLEAVED: - if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { + status = mgmt_le_support(hdev); + if (status) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, - MGMT_STATUS_NOT_SUPPORTED); + status); mgmt_pending_remove(cmd); goto failed; } if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && - !lmp_bredr_capable(hdev)) { + !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, MGMT_STATUS_NOT_SUPPORTED); mgmt_pending_remove(cmd); @@ -3065,6 +3193,135 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data, return err; } +static void set_advertising_complete(struct hci_dev *hdev, u8 status) +{ + struct cmd_lookup match = { NULL, hdev }; + + if (status) { + u8 mgmt_err = mgmt_status(status); + + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, + cmd_status_rsp, &mgmt_err); + return; + } + + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, + &match); + + new_settings(hdev, match.sk); + + if (match.sk) + sock_put(match.sk); +} + +static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) +{ + struct mgmt_mode *cp = data; + struct pending_cmd *cmd; + struct hci_request req; + u8 val, enabled, status; + int err; + + BT_DBG("request for %s", hdev->name); + + status = mgmt_le_support(hdev); + if (status) + return cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + status); + + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + val = !!cp->val; + enabled = test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); + + if (!hdev_is_powered(hdev) || val == enabled) { + bool changed = false; + + if (val != test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) { + change_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); + changed = true; + } + + err = send_settings_rsp(sk, MGMT_OP_SET_ADVERTISING, hdev); + if (err < 0) + goto unlock; + + if (changed) + err = new_settings(hdev, sk); + + goto unlock; + } + + if (mgmt_pending_find(MGMT_OP_SET_ADVERTISING, hdev) || + mgmt_pending_find(MGMT_OP_SET_LE, hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + MGMT_STATUS_BUSY); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_SET_ADVERTISING, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + hci_req_init(&req, hdev); + + hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(val), &val); + + err = hci_req_run(&req, set_advertising_complete); + if (err < 0) + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); + return err; +} + +static int set_static_address(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_cp_set_static_address *cp = data; + int err; + + BT_DBG("%s", hdev->name); + + if (!lmp_le_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_NOT_SUPPORTED); + + if (hdev_is_powered(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_REJECTED); + + if (bacmp(&cp->bdaddr, BDADDR_ANY)) { + if (!bacmp(&cp->bdaddr, BDADDR_NONE)) + return cmd_status(sk, hdev->id, + MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); + + /* Two most significant bits shall be set */ + if ((cp->bdaddr.b[5] & 0xc0) != 0xc0) + return cmd_status(sk, hdev->id, + MGMT_OP_SET_STATIC_ADDRESS, + MGMT_STATUS_INVALID_PARAMS); + } + + hci_dev_lock(hdev); + + bacpy(&hdev->static_addr, &cp->bdaddr); + + err = cmd_complete(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS, 0, NULL, 0); + + hci_dev_unlock(hdev); + + return err; +} + static void fast_connectable_complete(struct hci_dev *hdev, u8 status) { struct pending_cmd *cmd; @@ -3108,7 +3365,8 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, BT_DBG("%s", hdev->name); - if (!lmp_bredr_capable(hdev) || hdev->hci_ver < BLUETOOTH_VER_1_2) + if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags) || + hdev->hci_ver < BLUETOOTH_VER_1_2) return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, MGMT_STATUS_NOT_SUPPORTED); @@ -3162,6 +3420,121 @@ unlock: return err; } +static void set_bredr_complete(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + + BT_DBG("status 0x%02x", status); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_find(MGMT_OP_SET_BREDR, hdev); + if (!cmd) + goto unlock; + + if (status) { + u8 mgmt_err = mgmt_status(status); + + /* We need to restore the flag if related HCI commands + * failed. + */ + clear_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + + cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + } else { + send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); + new_settings(hdev, cmd->sk); + } + + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); +} + +static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) +{ + struct mgmt_mode *cp = data; + struct pending_cmd *cmd; + struct hci_request req; + int err; + + BT_DBG("request for %s", hdev->name); + + if (!lmp_bredr_capable(hdev) || !lmp_le_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_NOT_SUPPORTED); + + if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) + return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); + + if (cp->val != 0x00 && cp->val != 0x01) + return cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + if (cp->val == test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) { + err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev); + goto unlock; + } + + if (!hdev_is_powered(hdev)) { + if (!cp->val) { + clear_bit(HCI_CONNECTABLE, &hdev->dev_flags); + clear_bit(HCI_DISCOVERABLE, &hdev->dev_flags); + clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags); + clear_bit(HCI_LINK_SECURITY, &hdev->dev_flags); + clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + clear_bit(HCI_HS_ENABLED, &hdev->dev_flags); + } + + change_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + + err = send_settings_rsp(sk, MGMT_OP_SET_BREDR, hdev); + if (err < 0) + goto unlock; + + err = new_settings(hdev, sk); + goto unlock; + } + + /* Reject disabling when powered on */ + if (!cp->val) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_REJECTED); + goto unlock; + } + + if (mgmt_pending_find(MGMT_OP_SET_BREDR, hdev)) { + err = cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR, + MGMT_STATUS_BUSY); + goto unlock; + } + + cmd = mgmt_pending_add(sk, MGMT_OP_SET_BREDR, hdev, data, len); + if (!cmd) { + err = -ENOMEM; + goto unlock; + } + + /* We need to flip the bit already here so that hci_update_ad + * generates the correct flags. + */ + set_bit(HCI_BREDR_ENABLED, &hdev->dev_flags); + + hci_req_init(&req, hdev); + hci_update_ad(&req); + err = hci_req_run(&req, set_bredr_complete); + if (err < 0) + mgmt_pending_remove(cmd); + +unlock: + hci_dev_unlock(hdev); + return err; +} + static bool ltk_is_valid(struct mgmt_ltk_info *key) { if (key->authenticated != 0x00 && key->authenticated != 0x01) @@ -3180,6 +3553,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, u16 key_count, expected_len; int i, err; + BT_DBG("request for %s", hdev->name); + + if (!lmp_le_capable(hdev)) + return cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, + MGMT_STATUS_NOT_SUPPORTED); + key_count = __le16_to_cpu(cp->key_count); expected_len = sizeof(*cp) + key_count * @@ -3276,6 +3655,9 @@ static const struct mgmt_handler { { block_device, false, MGMT_BLOCK_DEVICE_SIZE }, { unblock_device, false, MGMT_UNBLOCK_DEVICE_SIZE }, { set_device_id, false, MGMT_SET_DEVICE_ID_SIZE }, + { set_advertising, false, MGMT_SETTING_SIZE }, + { set_bredr, false, MGMT_SETTING_SIZE }, + { set_static_address, false, MGMT_SET_STATIC_ADDRESS_SIZE }, }; @@ -3320,6 +3702,12 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) MGMT_STATUS_INVALID_INDEX); goto done; } + + if (test_bit(HCI_USER_CHANNEL, &hdev->dev_flags)) { + err = cmd_status(sk, index, opcode, + MGMT_STATUS_INVALID_INDEX); + goto done; + } } if (opcode >= ARRAY_SIZE(mgmt_handlers) || @@ -3365,14 +3753,6 @@ done: return err; } -static void cmd_status_rsp(struct pending_cmd *cmd, void *data) -{ - u8 *status = data; - - cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); - mgmt_pending_remove(cmd); -} - int mgmt_index_added(struct hci_dev *hdev) { if (!mgmt_valid_hdev(hdev)) @@ -3393,28 +3773,6 @@ int mgmt_index_removed(struct hci_dev *hdev) return mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL); } -struct cmd_lookup { - struct sock *sk; - struct hci_dev *hdev; - u8 mgmt_status; -}; - -static void settings_rsp(struct pending_cmd *cmd, void *data) -{ - struct cmd_lookup *match = data; - - send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); - - list_del(&cmd->list); - - if (match->sk == NULL) { - match->sk = cmd->sk; - sock_hold(match->sk); - } - - mgmt_pending_free(cmd); -} - static void set_bredr_scan(struct hci_request *req) { struct hci_dev *hdev = req->hdev; @@ -3481,6 +3839,22 @@ static int powered_update_hci(struct hci_dev *hdev) cp.simul != lmp_host_le_br_capable(hdev)) hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), &cp); + + /* In case BR/EDR was toggled during the AUTO_OFF phase */ + hci_update_ad(&req); + } + + if (lmp_le_capable(hdev)) { + /* Set random address to static address if configured */ + if (bacmp(&hdev->static_addr, BDADDR_ANY)) + hci_req_add(&req, HCI_OP_LE_SET_RANDOM_ADDR, 6, + &hdev->static_addr); + } + + if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) { + u8 adv = 0x01; + + hci_req_add(&req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(adv), &adv); } link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); @@ -3489,7 +3863,8 @@ static int powered_update_hci(struct hci_dev *hdev) sizeof(link_sec), &link_sec); if (lmp_bredr_capable(hdev)) { - set_bredr_scan(&req); + if (test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) + set_bredr_scan(&req); update_class(&req); update_name(&req); update_eir(&req); @@ -4132,44 +4507,6 @@ int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash, return err; } -int mgmt_le_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) -{ - struct cmd_lookup match = { NULL, hdev }; - bool changed = false; - int err = 0; - - if (status) { - u8 mgmt_err = mgmt_status(status); - - if (enable && test_and_clear_bit(HCI_LE_ENABLED, - &hdev->dev_flags)) - err = new_settings(hdev, NULL); - - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, - &mgmt_err); - - return err; - } - - if (enable) { - if (!test_and_set_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - changed = true; - } else { - if (test_and_clear_bit(HCI_LE_ENABLED, &hdev->dev_flags)) - changed = true; - } - - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); - - if (changed) - err = new_settings(hdev, match.sk); - - if (match.sk) - sock_put(match.sk); - - return err; -} - int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u8 cfm_name, u8 ssp, u8 *eir, u16 eir_len) @@ -4286,6 +4623,3 @@ int mgmt_device_unblocked(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) return mgmt_event(MGMT_EV_DEVICE_UNBLOCKED, hdev, &ev, sizeof(ev), cmd ? cmd->sk : NULL); } - -module_param(enable_hs, bool, 0644); -MODULE_PARM_DESC(enable_hs, "Enable High Speed support"); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 30b3721dc6d7..072938dc527d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -544,7 +544,7 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; struct sk_buff *skb; - int sent = 0; + int sent; if (test_bit(RFCOMM_DEFER_SETUP, &d->flags)) return -ENOTCONN; @@ -559,6 +559,10 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, lock_sock(sk); + sent = bt_sock_wait_ready(sk, msg->msg_flags); + if (sent) + goto done; + while (len) { size_t size = min_t(size_t, len, d->mtu); int err; @@ -594,6 +598,7 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, len -= size; } +done: release_sock(sk); return sent; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index b5562abdd6e0..884b2081a262 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -847,16 +847,27 @@ static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb) int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { - __u8 code = skb->data[0]; - __u8 reason; + struct hci_conn *hcon = conn->hcon; + __u8 code, reason; int err = 0; + if (hcon->type != LE_LINK) { + kfree_skb(skb); + return -ENOTSUPP; + } + + if (skb->len < 1) { + kfree_skb(skb); + return -EILSEQ; + } + if (!test_bit(HCI_LE_ENABLED, &conn->hcon->hdev->dev_flags)) { err = -ENOTSUPP; reason = SMP_PAIRING_NOTSUPP; goto done; } + code = skb->data[0]; skb_pull(skb, sizeof(code)); /* diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d1c578630678..005d876dd86c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -363,7 +363,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, skb_reset_mac_header(skb); eth = eth_hdr(skb); - memcpy(eth->h_source, br->dev->dev_addr, 6); + memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN); eth->h_dest[0] = 1; eth->h_dest[1] = 0; eth->h_dest[2] = 0x5e; @@ -433,7 +433,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, skb_reset_mac_header(skb); eth = eth_hdr(skb); - memcpy(eth->h_source, br->dev->dev_addr, 6); + memcpy(eth->h_source, br->dev->dev_addr, ETH_ALEN); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 8b84c581be30..3fb3c848affe 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -28,7 +28,7 @@ static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, uint32_t cmp[2] = { 0, 0 }; int key = ((const unsigned char *)mac)[5]; - memcpy(((char *) cmp) + 2, mac, 6); + memcpy(((char *) cmp) + 2, mac, ETH_ALEN); start = wh->table[key]; limit = wh->table[key + 1]; if (ip) { diff --git a/net/core/dev.c b/net/core/dev.c index 3430b1ed12e5..1b6eadf69289 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1307,7 +1307,7 @@ static int __dev_close_many(struct list_head *head) ASSERT_RTNL(); might_sleep(); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1323,7 +1323,7 @@ static int __dev_close_many(struct list_head *head) dev_deactivate_many(head); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { const struct net_device_ops *ops = dev->netdev_ops; /* @@ -1351,7 +1351,7 @@ static int __dev_close(struct net_device *dev) /* Temporarily disable netpoll until the interface is down */ netpoll_rx_disable(dev); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); retval = __dev_close_many(&single); list_del(&single); @@ -1362,21 +1362,20 @@ static int __dev_close(struct net_device *dev) static int dev_close_many(struct list_head *head) { struct net_device *dev, *tmp; - LIST_HEAD(tmp_list); - list_for_each_entry_safe(dev, tmp, head, unreg_list) + /* Remove the devices that don't need to be closed */ + list_for_each_entry_safe(dev, tmp, head, close_list) if (!(dev->flags & IFF_UP)) - list_move(&dev->unreg_list, &tmp_list); + list_del_init(&dev->close_list); __dev_close_many(head); - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry_safe(dev, tmp, head, close_list) { rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); call_netdevice_notifiers(NETDEV_DOWN, dev); + list_del_init(&dev->close_list); } - /* rollback_registered_many needs the complete original list */ - list_splice(&tmp_list, head); return 0; } @@ -1397,7 +1396,7 @@ int dev_close(struct net_device *dev) /* Block netpoll rx while the interface is going down */ netpoll_rx_disable(dev); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); dev_close_many(&single); list_del(&single); @@ -4374,42 +4373,40 @@ struct netdev_adjacent { /* upper master flag, there can only be one master device per list */ bool master; - /* indicates that this dev is our first-level lower/upper device */ - bool neighbour; - /* counter for the number of times this device was added to us */ u16 ref_nr; + /* private field for the users */ + void *private; + struct list_head list; struct rcu_head rcu; }; -static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, - struct net_device *adj_dev, - bool upper) +static struct netdev_adjacent *__netdev_find_adj_rcu(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *adj_list) { struct netdev_adjacent *adj; - struct list_head *dev_list; - dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list; - - list_for_each_entry(adj, dev_list, list) { + list_for_each_entry_rcu(adj, adj_list, list) { if (adj->dev == adj_dev) return adj; } return NULL; } -static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev, - struct net_device *udev) +static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev, + struct net_device *adj_dev, + struct list_head *adj_list) { - return __netdev_find_adj(dev, udev, true); -} + struct netdev_adjacent *adj; -static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev, - struct net_device *ldev) -{ - return __netdev_find_adj(dev, ldev, false); + list_for_each_entry(adj, adj_list, list) { + if (adj->dev == adj_dev) + return adj; + } + return NULL; } /** @@ -4426,7 +4423,7 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_upper(dev, upper_dev); + return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -4441,7 +4438,7 @@ bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); - return !list_empty(&dev->upper_dev_list); + return !list_empty(&dev->all_adj_list.upper); } EXPORT_SYMBOL(netdev_has_any_upper_dev); @@ -4458,10 +4455,10 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) ASSERT_RTNL(); - if (list_empty(&dev->upper_dev_list)) + if (list_empty(&dev->adj_list.upper)) return NULL; - upper = list_first_entry(&dev->upper_dev_list, + upper = list_first_entry(&dev->adj_list.upper, struct netdev_adjacent, list); if (likely(upper->master)) return upper->dev; @@ -4469,15 +4466,26 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); -/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list +void *netdev_adjacent_get_private(struct list_head *adj_list) +{ + struct netdev_adjacent *adj; + + adj = list_entry(adj_list, struct netdev_adjacent, list); + + return adj->private; +} +EXPORT_SYMBOL(netdev_adjacent_get_private); + +/** + * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device * @iter: list_head ** of the current position * * Gets the next device from the dev's upper list, starting from iter * position. The caller must hold RCU read lock. */ -struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter) +struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *upper; @@ -4485,14 +4493,71 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - if (&upper->list == &dev->upper_dev_list) + if (&upper->list == &dev->all_adj_list.upper) return NULL; *iter = &upper->list; return upper->dev; } -EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); +EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); + +/** + * netdev_lower_get_next_private - Get the next ->private from the + * lower neighbour list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent->private from the dev's lower neighbour + * list, starting from iter position. The caller must hold either hold the + * RTNL lock or its own locking that guarantees that the neighbour lower + * list will remain unchainged. + */ +void *netdev_lower_get_next_private(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry(*iter, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + if (iter) + *iter = lower->list.next; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_get_next_private); + +/** + * netdev_lower_get_next_private_rcu - Get the next ->private from the + * lower neighbour list, RCU + * variant + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent->private from the dev's lower neighbour + * list, starting from iter position. The caller must hold RCU read lock. + */ +void *netdev_lower_get_next_private_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + if (iter) + *iter = &lower->list; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); /** * netdev_master_upper_dev_get_rcu - Get master upper device @@ -4505,7 +4570,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev) { struct netdev_adjacent *upper; - upper = list_first_or_null_rcu(&dev->upper_dev_list, + upper = list_first_or_null_rcu(&dev->adj_list.upper, struct netdev_adjacent, list); if (upper && likely(upper->master)) return upper->dev; @@ -4515,15 +4580,16 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu); static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, - bool neighbour, bool master, - bool upper) + struct list_head *dev_list, + void *private, bool master) { struct netdev_adjacent *adj; + char linkname[IFNAMSIZ+7]; + int ret; - adj = __netdev_find_adj(dev, adj_dev, upper); + adj = __netdev_find_adj(dev, adj_dev, dev_list); if (adj) { - BUG_ON(neighbour); adj->ref_nr++; return 0; } @@ -4534,124 +4600,178 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->neighbour = neighbour; adj->ref_nr = 1; - + adj->private = private; dev_hold(adj_dev); - pr_debug("dev_hold for %s, because of %s link added from %s to %s\n", - adj_dev->name, upper ? "upper" : "lower", dev->name, - adj_dev->name); - if (!upper) { - list_add_tail_rcu(&adj->list, &dev->lower_dev_list); - return 0; + pr_debug("dev_hold for %s, because of link added from %s to %s\n", + adj_dev->name, dev->name, adj_dev->name); + + if (dev_list == &dev->adj_list.lower) { + sprintf(linkname, "lower_%s", adj_dev->name); + ret = sysfs_create_link(&(dev->dev.kobj), + &(adj_dev->dev.kobj), linkname); + if (ret) + goto free_adj; + } else if (dev_list == &dev->adj_list.upper) { + sprintf(linkname, "upper_%s", adj_dev->name); + ret = sysfs_create_link(&(dev->dev.kobj), + &(adj_dev->dev.kobj), linkname); + if (ret) + goto free_adj; } - /* Ensure that master upper link is always the first item in list. */ - if (master) - list_add_rcu(&adj->list, &dev->upper_dev_list); - else - list_add_tail_rcu(&adj->list, &dev->upper_dev_list); + /* Ensure that master link is always the first item in list. */ + if (master) { + ret = sysfs_create_link(&(dev->dev.kobj), + &(adj_dev->dev.kobj), "master"); + if (ret) + goto remove_symlinks; + + list_add_rcu(&adj->list, dev_list); + } else { + list_add_tail_rcu(&adj->list, dev_list); + } return 0; -} -static inline int __netdev_upper_dev_insert(struct net_device *dev, - struct net_device *udev, - bool master, bool neighbour) -{ - return __netdev_adjacent_dev_insert(dev, udev, neighbour, master, - true); -} +remove_symlinks: + if (dev_list == &dev->adj_list.lower) { + sprintf(linkname, "lower_%s", adj_dev->name); + sysfs_remove_link(&(dev->dev.kobj), linkname); + } else if (dev_list == &dev->adj_list.upper) { + sprintf(linkname, "upper_%s", adj_dev->name); + sysfs_remove_link(&(dev->dev.kobj), linkname); + } -static inline int __netdev_lower_dev_insert(struct net_device *dev, - struct net_device *ldev, - bool neighbour) -{ - return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false, - false); +free_adj: + kfree(adj); + + return ret; } void __netdev_adjacent_dev_remove(struct net_device *dev, - struct net_device *adj_dev, bool upper) + struct net_device *adj_dev, + struct list_head *dev_list) { struct netdev_adjacent *adj; + char linkname[IFNAMSIZ+7]; - if (upper) - adj = __netdev_find_upper(dev, adj_dev); - else - adj = __netdev_find_lower(dev, adj_dev); + adj = __netdev_find_adj(dev, adj_dev, dev_list); - if (!adj) + if (!adj) { + pr_err("tried to remove device %s from %s\n", + dev->name, adj_dev->name); BUG(); + } if (adj->ref_nr > 1) { + pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, + adj->ref_nr-1); adj->ref_nr--; return; } + if (adj->master) + sysfs_remove_link(&(dev->dev.kobj), "master"); + + if (dev_list == &dev->adj_list.lower) { + sprintf(linkname, "lower_%s", adj_dev->name); + sysfs_remove_link(&(dev->dev.kobj), linkname); + } else if (dev_list == &dev->adj_list.upper) { + sprintf(linkname, "upper_%s", adj_dev->name); + sysfs_remove_link(&(dev->dev.kobj), linkname); + } + list_del_rcu(&adj->list); - pr_debug("dev_put for %s, because of %s link removed from %s to %s\n", - adj_dev->name, upper ? "upper" : "lower", dev->name, - adj_dev->name); + pr_debug("dev_put for %s, because link removed from %s to %s\n", + adj_dev->name, dev->name, adj_dev->name); dev_put(adj_dev); kfree_rcu(adj, rcu); } -static inline void __netdev_upper_dev_remove(struct net_device *dev, - struct net_device *udev) -{ - return __netdev_adjacent_dev_remove(dev, udev, true); -} - -static inline void __netdev_lower_dev_remove(struct net_device *dev, - struct net_device *ldev) -{ - return __netdev_adjacent_dev_remove(dev, ldev, false); -} - -int __netdev_adjacent_dev_insert_link(struct net_device *dev, - struct net_device *upper_dev, - bool master, bool neighbour) +int __netdev_adjacent_dev_link_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list, + void *private, bool master) { int ret; - ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, + master); if (ret) return ret; - ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, + false); if (ret) { - __netdev_upper_dev_remove(dev, upper_dev); + __netdev_adjacent_dev_remove(dev, upper_dev, up_list); return ret; } return 0; } -static inline int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *udev) +int __netdev_adjacent_dev_link(struct net_device *dev, + struct net_device *upper_dev) { - return __netdev_adjacent_dev_insert_link(dev, udev, false, false); + return __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->all_adj_list.upper, + &upper_dev->all_adj_list.lower, + NULL, false); } -static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, - struct net_device *udev, - bool master) +void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, + struct net_device *upper_dev, + struct list_head *up_list, + struct list_head *down_list) { - return __netdev_adjacent_dev_insert_link(dev, udev, master, true); + __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, down_list); } void __netdev_adjacent_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { - __netdev_upper_dev_remove(dev, upper_dev); - __netdev_lower_dev_remove(upper_dev, dev); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + &dev->all_adj_list.upper, + &upper_dev->all_adj_list.lower); } +int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, + struct net_device *upper_dev, + void *private, bool master) +{ + int ret = __netdev_adjacent_dev_link(dev, upper_dev); + + if (ret) + return ret; + + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower, + private, master); + if (ret) { + __netdev_adjacent_dev_unlink(dev, upper_dev); + return ret; + } + + return 0; +} + +void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, + struct net_device *upper_dev) +{ + __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower); +} static int __netdev_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev, bool master) + struct net_device *upper_dev, bool master, + void *private) { struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; @@ -4662,26 +4782,29 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_upper(upper_dev, dev)) + if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper)) return -EBUSY; - if (__netdev_find_upper(dev, upper_dev)) + if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master); + ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private, + master); if (ret) return ret; /* Now that we linked these devs, make all the upper_dev's - * upper_dev_list visible to every dev's lower_dev_list and vice + * all_adj_list.upper visible to every dev's all_adj_list.lower an * versa, and don't forget the devices itself. All of these * links are non-neighbours. */ - list_for_each_entry(i, &dev->lower_dev_list, list) { - list_for_each_entry(j, &upper_dev->upper_dev_list, list) { + list_for_each_entry(i, &dev->all_adj_list.lower, list) { + list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { + pr_debug("Interlinking %s with %s, non-neighbour\n", + i->dev->name, j->dev->name); ret = __netdev_adjacent_dev_link(i->dev, j->dev); if (ret) goto rollback_mesh; @@ -4689,14 +4812,18 @@ static int __netdev_upper_dev_link(struct net_device *dev, } /* add dev to every upper_dev's upper device */ - list_for_each_entry(i, &upper_dev->upper_dev_list, list) { + list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { + pr_debug("linking %s's upper device %s with %s\n", + upper_dev->name, i->dev->name, dev->name); ret = __netdev_adjacent_dev_link(dev, i->dev); if (ret) goto rollback_upper_mesh; } /* add upper_dev to every dev's lower device */ - list_for_each_entry(i, &dev->lower_dev_list, list) { + list_for_each_entry(i, &dev->all_adj_list.lower, list) { + pr_debug("linking %s's lower device %s with %s\n", dev->name, + i->dev->name, upper_dev->name); ret = __netdev_adjacent_dev_link(i->dev, upper_dev); if (ret) goto rollback_lower_mesh; @@ -4707,7 +4834,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, rollback_lower_mesh: to_i = i; - list_for_each_entry(i, &dev->lower_dev_list, list) { + list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; __netdev_adjacent_dev_unlink(i->dev, upper_dev); @@ -4717,7 +4844,7 @@ rollback_lower_mesh: rollback_upper_mesh: to_i = i; - list_for_each_entry(i, &upper_dev->upper_dev_list, list) { + list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; __netdev_adjacent_dev_unlink(dev, i->dev); @@ -4728,8 +4855,8 @@ rollback_upper_mesh: rollback_mesh: to_i = i; to_j = j; - list_for_each_entry(i, &dev->lower_dev_list, list) { - list_for_each_entry(j, &upper_dev->upper_dev_list, list) { + list_for_each_entry(i, &dev->all_adj_list.lower, list) { + list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; __netdev_adjacent_dev_unlink(i->dev, j->dev); @@ -4738,7 +4865,7 @@ rollback_mesh: break; } - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); return ret; } @@ -4756,7 +4883,7 @@ rollback_mesh: int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, false); + return __netdev_upper_dev_link(dev, upper_dev, false, NULL); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -4774,10 +4901,18 @@ EXPORT_SYMBOL(netdev_upper_dev_link); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { - return __netdev_upper_dev_link(dev, upper_dev, true); + return __netdev_upper_dev_link(dev, upper_dev, true, NULL); } EXPORT_SYMBOL(netdev_master_upper_dev_link); +int netdev_master_upper_dev_link_private(struct net_device *dev, + struct net_device *upper_dev, + void *private) +{ + return __netdev_upper_dev_link(dev, upper_dev, true, private); +} +EXPORT_SYMBOL(netdev_master_upper_dev_link_private); + /** * netdev_upper_dev_unlink - Removes a link to upper device * @dev: device @@ -4792,29 +4927,59 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct netdev_adjacent *i, *j; ASSERT_RTNL(); - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); /* Here is the tricky part. We must remove all dev's lower * devices from all upper_dev's upper devices and vice * versa, to maintain the graph relationship. */ - list_for_each_entry(i, &dev->lower_dev_list, list) - list_for_each_entry(j, &upper_dev->upper_dev_list, list) + list_for_each_entry(i, &dev->all_adj_list.lower, list) + list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) __netdev_adjacent_dev_unlink(i->dev, j->dev); /* remove also the devices itself from lower/upper device * list */ - list_for_each_entry(i, &dev->lower_dev_list, list) + list_for_each_entry(i, &dev->all_adj_list.lower, list) __netdev_adjacent_dev_unlink(i->dev, upper_dev); - list_for_each_entry(i, &upper_dev->upper_dev_list, list) + list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) __netdev_adjacent_dev_unlink(dev, i->dev); call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); +void *netdev_lower_dev_get_private_rcu(struct net_device *dev, + struct net_device *lower_dev) +{ + struct netdev_adjacent *lower; + + if (!lower_dev) + return NULL; + lower = __netdev_find_adj_rcu(dev, lower_dev, &dev->adj_list.lower); + if (!lower) + return NULL; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_dev_get_private_rcu); + +void *netdev_lower_dev_get_private(struct net_device *dev, + struct net_device *lower_dev) +{ + struct netdev_adjacent *lower; + + if (!lower_dev) + return NULL; + lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower); + if (!lower) + return NULL; + + return lower->private; +} +EXPORT_SYMBOL(netdev_lower_dev_get_private); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; @@ -4823,7 +4988,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) ops->ndo_change_rx_flags(dev, flags); } -static int __dev_set_promiscuity(struct net_device *dev, int inc) +static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) { unsigned int old_flags = dev->flags; kuid_t uid; @@ -4866,6 +5031,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) dev_change_rx_flags(dev, IFF_PROMISC); } + if (notify) + __dev_notify_flags(dev, old_flags, IFF_PROMISC); return 0; } @@ -4885,7 +5052,7 @@ int dev_set_promiscuity(struct net_device *dev, int inc) unsigned int old_flags = dev->flags; int err; - err = __dev_set_promiscuity(dev, inc); + err = __dev_set_promiscuity(dev, inc, true); if (err < 0) return err; if (dev->flags != old_flags) @@ -4894,22 +5061,9 @@ int dev_set_promiscuity(struct net_device *dev, int inc) } EXPORT_SYMBOL(dev_set_promiscuity); -/** - * dev_set_allmulti - update allmulti count on a device - * @dev: device - * @inc: modifier - * - * Add or remove reception of all multicast frames to a device. While the - * count in the device remains above zero the interface remains listening - * to all interfaces. Once it hits zero the device reverts back to normal - * filtering operation. A negative @inc value is used to drop the counter - * when releasing a resource needing all multicasts. - * Return 0 if successful or a negative errno code on error. - */ - -int dev_set_allmulti(struct net_device *dev, int inc) +static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) { - unsigned int old_flags = dev->flags; + unsigned int old_flags = dev->flags, old_gflags = dev->gflags; ASSERT_RTNL(); @@ -4932,9 +5086,30 @@ int dev_set_allmulti(struct net_device *dev, int inc) if (dev->flags ^ old_flags) { dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); + if (notify) + __dev_notify_flags(dev, old_flags, + dev->gflags ^ old_gflags); } return 0; } + +/** + * dev_set_allmulti - update allmulti count on a device + * @dev: device + * @inc: modifier + * + * Add or remove reception of all multicast frames to a device. While the + * count in the device remains above zero the interface remains listening + * to all interfaces. Once it hits zero the device reverts back to normal + * filtering operation. A negative @inc value is used to drop the counter + * when releasing a resource needing all multicasts. + * Return 0 if successful or a negative errno code on error. + */ + +int dev_set_allmulti(struct net_device *dev, int inc) +{ + return __dev_set_allmulti(dev, inc, true); +} EXPORT_SYMBOL(dev_set_allmulti); /* @@ -4959,10 +5134,10 @@ void __dev_set_rx_mode(struct net_device *dev) * therefore calling __dev_set_promiscuity here is safe. */ if (!netdev_uc_empty(dev) && !dev->uc_promisc) { - __dev_set_promiscuity(dev, 1); + __dev_set_promiscuity(dev, 1, false); dev->uc_promisc = true; } else if (netdev_uc_empty(dev) && dev->uc_promisc) { - __dev_set_promiscuity(dev, -1); + __dev_set_promiscuity(dev, -1, false); dev->uc_promisc = false; } } @@ -5051,9 +5226,13 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; + unsigned int old_flags = dev->flags; dev->gflags ^= IFF_PROMISC; - dev_set_promiscuity(dev, inc); + + if (__dev_set_promiscuity(dev, inc, false) >= 0) + if (dev->flags != old_flags) + dev_set_rx_mode(dev); } /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI @@ -5064,16 +5243,20 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) int inc = (flags & IFF_ALLMULTI) ? 1 : -1; dev->gflags ^= IFF_ALLMULTI; - dev_set_allmulti(dev, inc); + __dev_set_allmulti(dev, inc, false); } return ret; } -void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) +void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, + unsigned int gchanges) { unsigned int changes = dev->flags ^ old_flags; + if (gchanges) + rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges); + if (changes & IFF_UP) { if (dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_UP, dev); @@ -5102,17 +5285,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) int dev_change_flags(struct net_device *dev, unsigned int flags) { int ret; - unsigned int changes, old_flags = dev->flags; + unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags; ret = __dev_change_flags(dev, flags); if (ret < 0) return ret; - changes = old_flags ^ dev->flags; - if (changes) - rtmsg_ifinfo(RTM_NEWLINK, dev, changes); - - __dev_notify_flags(dev, old_flags); + changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags); + __dev_notify_flags(dev, old_flags, changes); return ret; } EXPORT_SYMBOL(dev_change_flags); @@ -5259,6 +5439,7 @@ static void net_set_todo(struct net_device *dev) static void rollback_registered_many(struct list_head *head) { struct net_device *dev, *tmp; + LIST_HEAD(close_head); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -5281,7 +5462,9 @@ static void rollback_registered_many(struct list_head *head) } /* If device is running, close it first. */ - dev_close_many(head); + list_for_each_entry(dev, head, unreg_list) + list_add_tail(&dev->close_list, &close_head); + dev_close_many(&close_head); list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ @@ -6077,9 +6260,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->close_list); INIT_LIST_HEAD(&dev->link_watch_list); - INIT_LIST_HEAD(&dev->upper_dev_list); - INIT_LIST_HEAD(&dev->lower_dev_list); + INIT_LIST_HEAD(&dev->adj_list.upper); + INIT_LIST_HEAD(&dev->adj_list.lower); + INIT_LIST_HEAD(&dev->all_adj_list.upper); + INIT_LIST_HEAD(&dev->all_adj_list.lower); dev->priv_flags = IFF_XMIT_DST_RELEASE; setup(dev); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 8d7d0dd72db2..f8e25ac41c6c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -25,9 +25,35 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); } +/** + * skb_flow_get_ports - extract the upper layer ports and return them + * @skb: buffer to extract the ports from + * @thoff: transport header offset + * @ip_proto: protocol for which to get port offset + * + * The function will try to retrieve the ports at offset thoff + poff where poff + * is the protocol port offset returned from proto_ports_offset + */ +__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) +{ + int poff = proto_ports_offset(ip_proto); + + if (poff >= 0) { + __be32 *ports, _ports; + + ports = skb_header_pointer(skb, thoff + poff, + sizeof(_ports), &_ports); + if (ports) + return *ports; + } + + return 0; +} +EXPORT_SYMBOL(skb_flow_get_ports); + bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) { - int poff, nhoff = skb_network_offset(skb); + int nhoff = skb_network_offset(skb); u8 ip_proto; __be16 proto = skb->protocol; @@ -150,16 +176,7 @@ ipv6: } flow->ip_proto = ip_proto; - poff = proto_ports_offset(ip_proto); - if (poff >= 0) { - __be32 *ports, _ports; - - ports = skb_header_pointer(skb, nhoff + poff, - sizeof(_ports), &_ports); - if (ports) - flow->ports = *ports; - } - + flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto); flow->thoff = (u16) nhoff; return true; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6072610a8672..ca15f32821fb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -867,7 +867,7 @@ static void neigh_invalidate(struct neighbour *neigh) static void neigh_probe(struct neighbour *neigh) __releases(neigh->lock) { - struct sk_buff *skb = skb_peek(&neigh->arp_queue); + struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); /* keep skb alive even if arp_queue overflows */ if (skb) skb = skb_copy(skb, GFP_ATOMIC); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index d9cd627e6a16..9b7cf6c85f82 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -222,11 +222,10 @@ static void net_prio_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) { struct task_struct *p; - void *v; + void *v = (void *)(unsigned long)css->cgroup->id; cgroup_taskset_for_each(p, css, tset) { task_lock(p); - v = (void *)(unsigned long)task_netprioidx(p); iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2a0e21de3060..4aedf03da052 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1647,9 +1647,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) } dev->rtnl_link_state = RTNL_LINK_INITIALIZED; - rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U); - __dev_notify_flags(dev, old_flags); + __dev_notify_flags(dev, old_flags, ~0U); return 0; } EXPORT_SYMBOL(rtnl_configure_link); diff --git a/net/core/sock.c b/net/core/sock.c index 0b39e7ae4383..fd6afa267475 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -914,6 +914,13 @@ set_rcvbuf: } break; #endif + + case SO_MAX_PACING_RATE: + sk->sk_max_pacing_rate = val; + sk->sk_pacing_rate = min(sk->sk_pacing_rate, + sk->sk_max_pacing_rate); + break; + default: ret = -ENOPROTOOPT; break; @@ -1177,6 +1184,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; #endif + case SO_MAX_PACING_RATE: + v.val = sk->sk_max_pacing_rate; + break; + default: return -ENOPROTOOPT; } @@ -2319,6 +2330,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_ll_usec = sysctl_net_busy_read; #endif + sk->sk_max_pacing_rate = ~0U; sk->sk_pacing_rate = ~0U; /* * Before updating sk_refcnt, we must commit prior changes to memory diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index be1f64d35358..8f032bae60ad 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -58,7 +58,7 @@ #include <net/ipv6.h> #include <net/ip.h> #include <net/dsa.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> __setup("ether=", netdev_boot_setup); @@ -133,7 +133,7 @@ int eth_rebuild_header(struct sk_buff *skb) return arp_find(eth->h_dest, skb); #endif default: - printk(KERN_DEBUG + netdev_dbg(dev, "%s: unable to resolve type %X addresses.\n", dev->name, ntohs(eth->h_proto)); @@ -169,20 +169,9 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) else skb->pkt_type = PACKET_MULTICAST; } - - /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. - * - * Seems, you forgot to remove it. All silly devices - * seems to set IFF_PROMISC. - */ - - else if (1 /*dev->flags&IFF_PROMISC */ ) { - if (unlikely(!ether_addr_equal_64bits(eth->h_dest, - dev->dev_addr))) - skb->pkt_type = PACKET_OTHERHOST; - } + else if (unlikely(!ether_addr_equal_64bits(eth->h_dest, + dev->dev_addr))) + skb->pkt_type = PACKET_OTHERHOST; /* * Some variants of DSA tagging don't have an ethertype field @@ -190,12 +179,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * variants has been configured on the receiving interface, * and if so, set skb->protocol without looking at the packet. */ - if (netdev_uses_dsa_tags(dev)) + if (unlikely(netdev_uses_dsa_tags(dev))) return htons(ETH_P_DSA); - if (netdev_uses_trailer_tags(dev)) + + if (unlikely(netdev_uses_trailer_tags(dev))) return htons(ETH_P_TRAILER); - if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) + if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN)) return eth->h_proto; /* @@ -204,7 +194,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This * won't work for fault tolerant netware but does for the rest. */ - if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF) + if (unlikely(skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)) return htons(ETH_P_802_3); /* diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cfeb85cff4f0..35913fb77dc8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1546,6 +1546,7 @@ static const struct net_protocol tcp_protocol = { }; static const struct net_protocol udp_protocol = { + .early_demux = udp_v4_early_demux, .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 3df6d3edb2a1..45c74ba03970 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -762,12 +762,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) if (IS_LEAF(node) || ((struct tnode *) node)->pos > tn->pos + tn->bits - 1) { - if (tkey_extract_bits(node->key, - oldtnode->pos + oldtnode->bits, - 1) == 0) - put_child(tn, 2*i, node); - else - put_child(tn, 2*i+1, node); + put_child(tn, + tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1), + node); continue; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5f7d11a45871..5c0e8bc6e5ba 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -353,6 +353,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) saddr = fib_compute_spec_dst(skb); ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; + if (icmp_param->replyopts.opt.opt.optlen) { ipc.opt = &icmp_param->replyopts.opt; if (ipc.opt->opt.srr) @@ -608,6 +611,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) ipc.addr = iph->saddr; ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, type, code, icmp_param); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6acb541c9091..56e82a4027b4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -29,27 +29,19 @@ const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif -/* - * This struct holds the first and last local port number. - */ -struct local_ports sysctl_local_ports __read_mostly = { - .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock), - .range = { 32768, 61000 }, -}; - unsigned long *sysctl_local_reserved_ports; EXPORT_SYMBOL(sysctl_local_reserved_ports); -void inet_get_local_port_range(int *low, int *high) +void inet_get_local_port_range(struct net *net, int *low, int *high) { unsigned int seq; do { - seq = read_seqbegin(&sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); - *low = sysctl_local_ports.range[0]; - *high = sysctl_local_ports.range[1]; - } while (read_seqretry(&sysctl_local_ports.lock, seq)); + *low = net->ipv4.sysctl_local_ports.range[0]; + *high = net->ipv4.sysctl_local_ports.range[1]; + } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); @@ -79,17 +71,16 @@ int inet_csk_bind_conflict(const struct sock *sk, (!reuseport || !sk2->sk_reuseport || (sk2->sk_state != TCP_TIME_WAIT && !uid_eq(uid, sock_i_uid(sk2))))) { - const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || - sk2_rcv_saddr == sk_rcv_saddr(sk)) + + if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || + sk2->sk_rcv_saddr == sk->sk_rcv_saddr) break; } if (!relax && reuse && sk2->sk_reuse && sk2->sk_state != TCP_LISTEN) { - const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || - sk2_rcv_saddr == sk_rcv_saddr(sk)) + if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr || + sk2->sk_rcv_saddr == sk->sk_rcv_saddr) break; } } @@ -116,7 +107,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) int remaining, rover, low, high; again: - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; smallest_rover = rover = net_random() % remaining + low; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5f648751fce2..22000279efc8 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -222,7 +222,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { - long tmo; + s32 tmo; struct inet_diag_msg *r; struct nlmsghdr *nlh; @@ -234,7 +234,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_ttd - jiffies; + tmo = tw->tw_ttd - inet_tw_time_stamp(); if (tmo < 0) tmo = 0; @@ -248,7 +248,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->id.idiag_dst[0] = tw->tw_daddr; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); + r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 96da9c77deca..ae199596b9b0 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -494,7 +494,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, u32 offset = hint + port_offset; struct inet_timewait_sock *tw = NULL; - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; local_bh_disable(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1f27c9f4afd0..9bcd8f7234ec 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -387,11 +387,11 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, if (slot >= INET_TWDR_TWKILL_SLOTS) slot = INET_TWDR_TWKILL_SLOTS - 1; } - tw->tw_ttd = jiffies + timeo; + tw->tw_ttd = inet_tw_time_stamp() + timeo; slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); list = &twdr->cells[slot]; } else { - tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); + tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK); if (twdr->twcal_hand < 0) { twdr->twcal_hand = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a04d872c54f9..7d8357bb2ba6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1060,6 +1060,9 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, rt->dst.dev->mtu : dst_mtu(&rt->dst); cork->dst = &rt->dst; cork->length = 0; + cork->ttl = ipc->ttl; + cork->tos = ipc->tos; + cork->priority = ipc->priority; cork->tx_flags = ipc->tx_flags; return 0; @@ -1311,7 +1314,9 @@ struct sk_buff *__ip_make_skb(struct sock *sk, if (cork->flags & IPCORK_OPT) opt = cork->opt; - if (rt->rt_type == RTN_MULTICAST) + if (cork->ttl != 0) + ttl = cork->ttl; + else if (rt->rt_type == RTN_MULTICAST) ttl = inet->mc_ttl; else ttl = ip_select_ttl(inet, &rt->dst); @@ -1319,7 +1324,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph = ip_hdr(skb); iph->version = 4; iph->ihl = 5; - iph->tos = inet->tos; + iph->tos = (cork->tos != -1) ? cork->tos : inet->tos; iph->frag_off = df; iph->ttl = ttl; iph->protocol = sk->sk_protocol; @@ -1331,7 +1336,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_options_build(skb, opt, cork->addr, rt, 0); } - skb->priority = sk->sk_priority; + skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; skb->mark = sk->sk_mark; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec @@ -1481,6 +1486,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, ipc.addr = daddr; ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; if (replyopts.opt.opt.optlen) { ipc.opt = &replyopts.opt; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d9c4f113d709..0626f2cb192e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -189,7 +189,7 @@ EXPORT_SYMBOL(ip_cmsg_recv); int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) { - int err; + int err, val; struct cmsghdr *cmsg; for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { @@ -215,6 +215,24 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) ipc->addr = info->ipi_spec_dst.s_addr; break; } + case IP_TTL: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 1 || val > 255) + return -EINVAL; + ipc->ttl = val; + break; + case IP_TOS: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + return -EINVAL; + val = *(int *)CMSG_DATA(cmsg); + if (val < 0 || val > 255) + return -EINVAL; + ipc->tos = val; + ipc->priority = rt_tos2priority(ipc->tos); + break; + default: return -EINVAL; } @@ -1034,11 +1052,12 @@ e_inval: * destination in skb->cb[] before dst drop. * This way, receiver doesnt make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); - if (skb_rtable(skb)) { + if ((inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) && + skb_rtable(skb)) { pktinfo->ipi_ifindex = inet_iif(skb); pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index e805e7b3030e..91f69bc883fe 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -49,70 +49,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly; static int vti_net_id __read_mostly; static int vti_tunnel_init(struct net_device *dev); -static int vti_err(struct sk_buff *skb, u32 info) -{ - - /* All the routers (except for Linux) return only - * 8 bytes of packet payload. It means, that precise relaying of - * ICMP in the real Internet is absolutely infeasible. - */ - struct net *net = dev_net(skb->dev); - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - struct iphdr *iph = (struct iphdr *)skb->data; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct ip_tunnel *t; - int err; - - switch (type) { - default: - case ICMP_PARAMETERPROB: - return 0; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - default: - /* All others are translated to HOST_UNREACH. */ - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - } - - err = -ENOENT; - - t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->daddr, iph->saddr, 0); - if (t == NULL) - goto out; - - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPIP, 0); - err = 0; - goto out; - } - - err = 0; - if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - goto out; - - if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) - t->err_count++; - else - t->err_count = 1; - t->err_time = jiffies; -out: - return err; -} - /* We dont digest the packet therefore let the packet pass */ static int vti_rcv(struct sk_buff *skb) { @@ -296,9 +232,8 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; } -static struct xfrm_tunnel vti_handler __read_mostly = { +static struct xfrm_tunnel_notifier vti_handler __read_mostly = { .handler = vti_rcv, - .err_handler = vti_err, .priority = 1, }; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index d7d9882d4cae..a62610443152 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -237,11 +237,11 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, unsigned int seq; do { - seq = read_seqbegin(&sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); } @@ -713,6 +713,8 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.oif = sk->sk_bound_dev_if; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; sock_tx_timestamp(sk, &ipc.tx_flags); @@ -744,7 +746,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EINVAL; faddr = ipc.opt->opt.faddr; } - tos = RT_TOS(inet->tos); + tos = get_rttos(&ipc, inet); if (sock_flag(sk, SOCK_LOCALROUTE) || (msg->msg_flags & MSG_DONTROUTE) || (ipc.opt && ipc.opt->opt.is_strictroute)) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 193db03540ad..41e1d2845c8f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -299,7 +299,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) { /* Charge it to the socket. */ - ipv4_pktinfo_prepare(skb); + ipv4_pktinfo_prepare(sk, skb); if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; @@ -519,6 +519,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { @@ -558,7 +560,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, daddr = ipc.opt->opt.faddr; } } - tos = RT_CONN_FLAGS(sk); + tos = get_rtconn_flags(&ipc, sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 14a15c49129d..15e024105f91 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -89,8 +89,7 @@ __u32 cookie_init_timestamp(struct request_sock *req) static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, - __be16 dport, __u32 sseq, __u32 count, - __u32 data) + __be16 dport, __u32 sseq, __u32 data) { /* * Compute the secure sequence number. @@ -102,7 +101,7 @@ static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, * As an extra hack, we add a small "data" value that encodes the * MSS into the second hash value. */ - + u32 count = tcp_cookie_time(); return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq + (count << COOKIEBITS) + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) @@ -114,22 +113,21 @@ static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, * If the syncookie is bad, the data returned will be out of * range. This must be checked by the caller. * - * The count value used to generate the cookie must be within - * "maxdiff" if the current (passed-in) "count". The return value - * is (__u32)-1 if this test fails. + * The count value used to generate the cookie must be less than + * MAX_SYNCOOKIE_AGE minutes in the past. + * The return value (__u32)-1 if this test fails. */ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, __u32 sseq, - __u32 count, __u32 maxdiff) + __be16 sport, __be16 dport, __u32 sseq) { - __u32 diff; + u32 diff, count = tcp_cookie_time(); /* Strip away the layers from the cookie */ cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ diff = (count - (cookie >> COOKIEBITS)) & ((__u32) - 1 >> COOKIEBITS); - if (diff >= maxdiff) + if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; return (cookie - @@ -138,22 +136,22 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, } /* - * MSS Values are taken from the 2009 paper - * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson: - * - values 1440 to 1460 accounted for 80% of observed mss values - * - values outside the 536-1460 range are rare (<0.2%). + * MSS Values are chosen based on the 2011 paper + * 'An Analysis of TCP Maximum Segement Sizes' by S. Alcock and R. Nelson. + * Values .. + * .. lower than 536 are rare (< 0.2%) + * .. between 537 and 1299 account for less than < 1.5% of observed values + * .. in the 1300-1349 range account for about 15 to 20% of observed mss values + * .. exceeding 1460 are very rare (< 0.04%) * - * Table must be sorted. + * 1460 is the single most frequently announced mss value (30 to 46% depending + * on monitor location). Table must be sorted. */ static __u16 const msstab[] = { - 64, - 512, 536, - 1024, - 1440, + 1300, + 1440, /* 1440, 1452: PPPoE */ 1460, - 4312, - 8960, }; /* @@ -173,7 +171,7 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, return secure_tcp_syn_cookie(iph->saddr, iph->daddr, th->source, th->dest, ntohl(th->seq), - jiffies / (HZ * 60), mssind); + mssind); } EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); @@ -189,13 +187,6 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) } /* - * This (misnamed) value is the age of syncookie which is permitted. - * Its ideal value should be dependent on TCP_TIMEOUT_INIT and - * sysctl_tcp_retries1. It's a rather complicated formula (exponential - * backoff) to compute at runtime so it's currently hardcoded here. - */ -#define COUNTER_TRIES 4 -/* * Check if a ack sequence number is a valid syncookie. * Return the decoded mss if it is, or 0 if not. */ @@ -204,9 +195,7 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, { __u32 seq = ntohl(th->seq) - 1; __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, - th->source, th->dest, seq, - jiffies / (HZ * 60), - COUNTER_TRIES); + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 540279f4c531..c08f096d46b5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -43,12 +43,12 @@ static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ -static void set_local_port_range(int range[2]) +static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&sysctl_local_ports.lock); - sysctl_local_ports.range[0] = range[0]; - sysctl_local_ports.range[1] = range[1]; - write_sequnlock(&sysctl_local_ports.lock); + write_seqlock(&net->ipv4.sysctl_local_ports.lock); + net->ipv4.sysctl_local_ports.range[0] = range[0]; + net->ipv4.sysctl_local_ports.range[1] = range[1]; + write_sequnlock(&net->ipv4.sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -56,6 +56,8 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = + container_of(table->data, struct net, ipv4.sysctl_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { @@ -66,14 +68,15 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, .extra2 = &ip_local_port_range_max, }; - inet_get_local_port_range(range, range + 1); + inet_get_local_port_range(net, &range[0], &range[1]); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) { if (range[1] < range[0]) ret = -EINVAL; else - set_local_port_range(range); + set_local_port_range(net, range); } return ret; @@ -83,23 +86,27 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high) { kgid_t *data = table->data; + struct net *net = + container_of(table->data, struct net, ipv4.sysctl_ping_group_range); unsigned int seq; do { - seq = read_seqbegin(&sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); } /* Update system visible IP port range */ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high) { kgid_t *data = table->data; - write_seqlock(&sysctl_local_ports.lock); + struct net *net = + container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + write_seqlock(&net->ipv4.sysctl_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&sysctl_local_ports.lock); + write_sequnlock(&net->ipv4.sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -475,13 +482,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "ip_local_port_range", - .data = &sysctl_local_ports.range, - .maxlen = sizeof(sysctl_local_ports.range), - .mode = 0644, - .proc_handler = ipv4_local_port_range, - }, - { .procname = "ip_local_reserved_ports", .data = NULL, /* initialized in sysctl_ipv4_init */ .maxlen = 65536, @@ -854,6 +854,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec }, { + .procname = "ip_local_port_range", + .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), + .data = &init_net.ipv4.sysctl_local_ports.range, + .mode = 0644, + .proc_handler = ipv4_local_port_range, + }, + { .procname = "tcp_mem", .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem), .mode = 0644, @@ -888,6 +895,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) &net->ipv4.sysctl_ping_group_range; table[7].data = &net->ipv4.sysctl_tcp_ecn; + table[8].data = + &net->ipv4.sysctl_local_ports.range; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) @@ -901,6 +910,13 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); + /* + * Set defaults for local port range + */ + seqlock_init(&net->ipv4.sysctl_local_ports.lock); + net->ipv4.sysctl_local_ports.range[0] = 32768; + net->ipv4.sysctl_local_ports.range[1] = 61000; + tcp_init_mem(net); net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 113dc5f17d47..47b8ab7dce9c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -267,11 +267,31 @@ static bool TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr * 1. Tuning sk->sk_sndbuf, when connection enters established state. */ -static void tcp_fixup_sndbuf(struct sock *sk) +static void tcp_sndbuf_expand(struct sock *sk) { - int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); + const struct tcp_sock *tp = tcp_sk(sk); + int sndmem, per_mss; + u32 nr_segs; + + /* Worst case is non GSO/TSO : each frame consumes one skb + * and skb->head is kmalloced using power of two area of memory + */ + per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + + MAX_TCP_HEADER + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + per_mss = roundup_pow_of_two(per_mss) + + SKB_DATA_ALIGN(sizeof(struct sk_buff)); + + nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); + nr_segs = max_t(u32, nr_segs, tp->reordering + 1); + + /* Fast Recovery (RFC 5681 3.2) : + * Cubic needs 1.7 factor, rounded to 2 to include + * extra cushion (application might react slowly to POLLOUT) + */ + sndmem = 2 * nr_segs * per_mss; - sndmem *= TCP_INIT_CWND; if (sk->sk_sndbuf < sndmem) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); } @@ -355,6 +375,12 @@ static void tcp_fixup_rcvbuf(struct sock *sk) rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * tcp_default_init_rwnd(mss); + /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency + * Allow enough cushion so that sender is not limited by our window + */ + if (sysctl_tcp_moderate_rcvbuf) + rcvmem <<= 2; + if (sk->sk_rcvbuf < rcvmem) sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); } @@ -370,9 +396,11 @@ void tcp_init_buffer_space(struct sock *sk) if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) tcp_fixup_rcvbuf(sk); if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) - tcp_fixup_sndbuf(sk); + tcp_sndbuf_expand(sk); tp->rcvq_space.space = tp->rcv_wnd; + tp->rcvq_space.time = tcp_time_stamp; + tp->rcvq_space.seq = tp->copied_seq; maxwin = tcp_full_space(sk); @@ -512,48 +540,62 @@ void tcp_rcv_space_adjust(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); int time; - int space; - - if (tp->rcvq_space.time == 0) - goto new_measure; + int copied; time = tcp_time_stamp - tp->rcvq_space.time; if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) return; - space = 2 * (tp->copied_seq - tp->rcvq_space.seq); + /* Number of bytes copied to user in last RTT */ + copied = tp->copied_seq - tp->rcvq_space.seq; + if (copied <= tp->rcvq_space.space) + goto new_measure; + + /* A bit of theory : + * copied = bytes received in previous RTT, our base window + * To cope with packet losses, we need a 2x factor + * To cope with slow start, and sender growing its cwin by 100 % + * every RTT, we need a 4x factor, because the ACK we are sending + * now is for the next RTT, not the current one : + * <prev RTT . ><current RTT .. ><next RTT .... > + */ - space = max(tp->rcvq_space.space, space); + if (sysctl_tcp_moderate_rcvbuf && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + int rcvwin, rcvmem, rcvbuf; - if (tp->rcvq_space.space != space) { - int rcvmem; + /* minimal window to cope with packet losses, assuming + * steady state. Add some cushion because of small variations. + */ + rcvwin = (copied << 1) + 16 * tp->advmss; + + /* If rate increased by 25%, + * assume slow start, rcvwin = 3 * copied + * If rate increased by 50%, + * assume sender can use 2x growth, rcvwin = 4 * copied + */ + if (copied >= + tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) { + if (copied >= + tp->rcvq_space.space + (tp->rcvq_space.space >> 1)) + rcvwin <<= 1; + else + rcvwin += (rcvwin >> 1); + } - tp->rcvq_space.space = space; + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); + while (tcp_win_from_space(rcvmem) < tp->advmss) + rcvmem += 128; - if (sysctl_tcp_moderate_rcvbuf && - !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int new_clamp = space; + rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); + if (rcvbuf > sk->sk_rcvbuf) { + sk->sk_rcvbuf = rcvbuf; - /* Receive space grows, normalize in order to - * take into account packet headers and sk_buff - * structure overhead. - */ - space /= tp->advmss; - if (!space) - space = 1; - rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); - while (tcp_win_from_space(rcvmem) < tp->advmss) - rcvmem += 128; - space *= rcvmem; - space = min(space, sysctl_tcp_rmem[2]); - if (space > sk->sk_rcvbuf) { - sk->sk_rcvbuf = space; - - /* Make the window clamp follow along. */ - tp->window_clamp = new_clamp; - } + /* Make the window clamp follow along. */ + tp->window_clamp = rcvwin; } } + tp->rcvq_space.space = copied; new_measure: tp->rcvq_space.seq = tp->copied_seq; @@ -713,7 +755,7 @@ static void tcp_update_pacing_rate(struct sock *sk) if (tp->srtt > 8 + 2) do_div(rate, tp->srtt); - sk->sk_pacing_rate = min_t(u64, rate, ~0U); + sk->sk_pacing_rate = min_t(u64, rate, sk->sk_max_pacing_rate); } /* Calculate rto without backoff. This is the second half of Van Jacobson's @@ -2973,7 +3015,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, const struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; u32 now = tcp_time_stamp; - int fully_acked = true; + bool fully_acked = true; int flag = 0; u32 pkts_acked = 0; u32 reord = tp->packets_out; @@ -4704,15 +4746,7 @@ static void tcp_new_space(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_should_expand_sndbuf(sk)) { - int sndmem = SKB_TRUESIZE(max_t(u32, - tp->rx_opt.mss_clamp, - tp->mss_cache) + - MAX_TCP_HEADER); - int demanded = max_t(unsigned int, tp->snd_cwnd, - tp->reordering + 1); - sndmem *= 2 * demanded; - if (sndmem > sk->sk_sndbuf) - sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); + tcp_sndbuf_expand(sk); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -5677,8 +5711,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_congestion_control(sk); tcp_mtup_init(sk); - tcp_init_buffer_space(sk); tp->copied_seq = tp->rcv_nxt; + tcp_init_buffer_space(sk); } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b14266bb91eb..5d6b1a609da8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1410,8 +1410,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, inet_csk(child)->icsk_af_ops->rebuild_header(child); tcp_init_congestion_control(child); tcp_mtup_init(child); - tcp_init_buffer_space(child); tcp_init_metrics(child); + tcp_init_buffer_space(child); /* Queue the data carried in the SYN packet. We need to first * bump skb's refcnt because the caller will attempt to free it. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0ca44df51ee9..4226c53daaed 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -103,6 +103,7 @@ #include <linux/seq_file.h> #include <net/net_namespace.h> #include <net/icmp.h> +#include <net/inet_hashtables.h> #include <net/route.h> #include <net/checksum.h> #include <net/xfrm.h> @@ -219,7 +220,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned short first, last; DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rand = net_random(); @@ -565,6 +566,26 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, } EXPORT_SYMBOL_GPL(udp4_lib_lookup); +static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif, unsigned short hnum) +{ + struct inet_sock *inet = inet_sk(sk); + + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + (inet->inet_daddr && inet->inet_daddr != rmt_addr) || + (inet->inet_dport != rmt_port && inet->inet_dport) || + (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || + ipv6_only_sock(sk) || + (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + return false; + if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) + return false; + return true; +} + static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, @@ -575,20 +596,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, unsigned short hnum = ntohs(loc_port); sk_nulls_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); - - if (!net_eq(sock_net(s), net) || - udp_sk(s)->udp_port_hash != hnum || - (inet->inet_daddr && inet->inet_daddr != rmt_addr) || - (inet->inet_dport != rmt_port && inet->inet_dport) || - (inet->inet_rcv_saddr && - inet->inet_rcv_saddr != loc_addr) || - ipv6_only_sock(s) || - (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) - continue; - if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) - continue; - goto found; + if (__udp_is_mcast_sock(net, s, + loc_port, loc_addr, + rmt_port, rmt_addr, + dif, hnum)) + goto found; } s = NULL; found: @@ -855,6 +867,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.opt = NULL; ipc.tx_flags = 0; + ipc.ttl = 0; + ipc.tos = -1; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; @@ -938,7 +952,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, faddr = ipc.opt->opt.faddr; connected = 0; } - tos = RT_TOS(inet->tos); + tos = get_rttos(&ipc, inet); if (sock_flag(sk, SOCK_LOCALROUTE) || (msg->msg_flags & MSG_DONTROUTE) || (ipc.opt && ipc.opt->opt.is_strictroute)) { @@ -1403,8 +1417,10 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; - if (inet_sk(sk)->inet_daddr) + if (inet_sk(sk)->inet_daddr) { sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); + } rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { @@ -1528,7 +1544,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = 0; - ipv4_pktinfo_prepare(skb); + ipv4_pktinfo_prepare(sk, skb); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); @@ -1577,6 +1593,14 @@ static void flush_stack(struct sock **stack, unsigned int count, kfree_skb(skb1); } +static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + dst_hold(dst); + sk->sk_rx_dst = dst; +} + /* * Multicasts and broadcasts go to each listener. * @@ -1705,16 +1729,32 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(net, skb, uh, - saddr, daddr, udptable); + if (skb->sk) { + int ret; + sk = skb->sk; + + if (unlikely(sk->sk_rx_dst == NULL)) + udp_sk_rx_dst_set(sk, skb); + + ret = udp_queue_rcv_skb(sk, skb); + + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; + return 0; + } else { + if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) + return __udp4_lib_mcast_deliver(net, skb, uh, + saddr, daddr, udptable); - sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + } if (sk != NULL) { int ret; - sk_mark_napi_id(sk, skb); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); @@ -1768,6 +1808,135 @@ drop: return 0; } +/* We can only early demux multicast if there is a single matching socket. + * If more than one socket found returns NULL + */ +static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif) +{ + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned short hnum = ntohs(loc_port); + unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); + struct udp_hslot *hslot = &udp_table.hash[slot]; + + rcu_read_lock(); +begin: + count = 0; + result = NULL; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { + if (__udp_is_mcast_sock(net, sk, + loc_port, loc_addr, + rmt_port, rmt_addr, + dif, hnum)) { + result = sk; + ++count; + } + } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begin; + + if (result) { + if (count != 1 || + unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) + result = NULL; + else if (unlikely(!__udp_is_mcast_sock(net, sk, + loc_port, loc_addr, + rmt_port, rmt_addr, + dif, hnum))) { + sock_put(result); + result = NULL; + } + } + rcu_read_unlock(); + return result; +} + +/* For unicast we should only early demux connected sockets or we can + * break forwarding setups. The chains here can be long so only check + * if the first socket is an exact match and if not move on. + */ +static struct sock *__udp4_lib_demux_lookup(struct net *net, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif) +{ + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned short hnum = ntohs(loc_port); + unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); + unsigned int slot2 = hash2 & udp_table.mask; + struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; + INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr) + const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + + rcu_read_lock(); + result = NULL; + udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { + if (INET_MATCH(sk, net, acookie, + rmt_addr, loc_addr, ports, dif)) + result = sk; + /* Only check first socket in chain */ + break; + } + + if (result) { + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) + result = NULL; + else if (unlikely(!INET_MATCH(sk, net, acookie, + rmt_addr, loc_addr, + ports, dif))) { + sock_put(result); + result = NULL; + } + } + rcu_read_unlock(); + return result; +} + +void udp_v4_early_demux(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *uh = udp_hdr(skb); + struct sock *sk; + struct dst_entry *dst; + struct net *net = dev_net(skb->dev); + int dif = skb->dev->ifindex; + + /* validate the packet */ + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) + return; + + if (skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) + sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, + uh->source, iph->saddr, dif); + else if (skb->pkt_type == PACKET_HOST) + sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, + uh->source, iph->saddr, dif); + else + return; + + if (!sk) + return; + + skb->sk = sk; + skb->destructor = sock_edemux; + dst = sk->sk_rx_dst; + + if (dst) + dst = dst_check(dst, 0); + if (dst) + skb_dst_set_noref(skb, dst); +} + int udp_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index b5663c37f089..31b18152528f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -16,13 +16,13 @@ #include <net/xfrm.h> /* Informational hook. The decap is still done here. */ -static struct xfrm_tunnel __rcu *rcv_notify_handlers __read_mostly; +static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex); -int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler) +int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) { - struct xfrm_tunnel __rcu **pprev; - struct xfrm_tunnel *t; + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; int ret = -EEXIST; int priority = handler->priority; @@ -50,10 +50,10 @@ err: } EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register); -int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler) +int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) { - struct xfrm_tunnel __rcu **pprev; - struct xfrm_tunnel *t; + struct xfrm_tunnel_notifier __rcu **pprev; + struct xfrm_tunnel_notifier *t; int ret = -ENOENT; mutex_lock(&xfrm4_mode_tunnel_input_mutex); @@ -134,7 +134,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_tunnel *handler; + struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100b021e..4966b124dc2e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1028,52 +1028,4 @@ out_unregister_tcp_proto: } module_init(inet6_init); -static void __exit inet6_exit(void) -{ - if (disable_ipv6_mod) - return; - - /* First of all disallow new sockets creation. */ - sock_unregister(PF_INET6); - /* Disallow any further netlink messages */ - rtnl_unregister_all(PF_INET6); - - udpv6_exit(); - udplitev6_exit(); - tcpv6_exit(); - - /* Cleanup code parts. */ - ipv6_packet_cleanup(); - ipv6_frag_exit(); - ipv6_exthdrs_exit(); - addrconf_cleanup(); - ip6_flowlabel_cleanup(); - ndisc_late_cleanup(); - ip6_route_cleanup(); -#ifdef CONFIG_PROC_FS - - /* Cleanup code parts. */ - if6_proc_exit(); - ipv6_misc_proc_exit(); - udplite6_proc_exit(); - raw6_proc_exit(); -#endif - ipv6_netfilter_fini(); - ipv6_stub = NULL; - igmp6_cleanup(); - ndisc_cleanup(); - ip6_mr_cleanup(); - icmpv6_cleanup(); - rawv6_exit(); - - unregister_pernet_subsys(&inet6_net_ops); - proto_unregister(&rawv6_prot); - proto_unregister(&udplitev6_prot); - proto_unregister(&udpv6_prot); - proto_unregister(&tcpv6_prot); - - rcu_barrier(); /* Wait for completion of call_rcu()'s */ -} -module_exit(inet6_exit); - MODULE_ALIAS_NETPROTO(PF_INET6); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5bec666aba61..5550a8113a6d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1529,25 +1529,6 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, fib6_walk(&c.w); } -void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) -{ - struct fib6_table *table; - struct hlist_head *head; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { - head = &net->ipv6.fib_table_hash[h]; - hlist_for_each_entry_rcu(table, head, tb6_hlist) { - read_lock_bh(&table->tb6_lock); - fib6_clean_tree(net, &table->tb6_root, - func, prune, arg); - read_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { @@ -1782,3 +1763,189 @@ void fib6_gc_cleanup(void) unregister_pernet_subsys(&fib6_net_ops); kmem_cache_destroy(fib6_node_kmem); } + +#ifdef CONFIG_PROC_FS + +struct ipv6_route_iter { + struct seq_net_private p; + struct fib6_walker_t w; + loff_t skip; + struct fib6_table *tbl; + __u32 sernum; +}; + +static int ipv6_route_seq_show(struct seq_file *seq, void *v) +{ + struct rt6_info *rt = v; + struct ipv6_route_iter *iter = seq->private; + + seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + +#ifdef CONFIG_IPV6_SUBTREES + seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); +#else + seq_puts(seq, "00000000000000000000000000000000 00 "); +#endif + if (rt->rt6i_flags & RTF_GATEWAY) + seq_printf(seq, "%pi6", &rt->rt6i_gateway); + else + seq_puts(seq, "00000000000000000000000000000000"); + + seq_printf(seq, " %08x %08x %08x %08x %8s\n", + rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), + rt->dst.__use, rt->rt6i_flags, + rt->dst.dev ? rt->dst.dev->name : ""); + iter->w.leaf = NULL; + return 0; +} + +static int ipv6_route_yield(struct fib6_walker_t *w) +{ + struct ipv6_route_iter *iter = w->args; + + if (!iter->skip) + return 1; + + do { + iter->w.leaf = iter->w.leaf->dst.rt6_next; + iter->skip--; + if (!iter->skip && iter->w.leaf) + return 1; + } while (iter->w.leaf); + + return 0; +} + +static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) +{ + memset(&iter->w, 0, sizeof(iter->w)); + iter->w.func = ipv6_route_yield; + iter->w.root = &iter->tbl->tb6_root; + iter->w.state = FWS_INIT; + iter->w.node = iter->w.root; + iter->w.args = iter; + iter->sernum = iter->w.root->fn_sernum; + INIT_LIST_HEAD(&iter->w.lh); + fib6_walker_link(&iter->w); +} + +static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, + struct net *net) +{ + unsigned int h; + struct hlist_node *node; + + if (tbl) { + h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1; + node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist)); + } else { + h = 0; + node = NULL; + } + + while (!node && h < FIB6_TABLE_HASHSZ) { + node = rcu_dereference_bh( + hlist_first_rcu(&net->ipv6.fib_table_hash[h++])); + } + return hlist_entry_safe(node, struct fib6_table, tb6_hlist); +} + +static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) +{ + if (iter->sernum != iter->w.root->fn_sernum) { + iter->sernum = iter->w.root->fn_sernum; + iter->w.state = FWS_INIT; + iter->w.node = iter->w.root; + WARN_ON(iter->w.skip); + iter->w.skip = iter->w.count; + } +} + +static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + int r; + struct rt6_info *n; + struct net *net = seq_file_net(seq); + struct ipv6_route_iter *iter = seq->private; + + if (!v) + goto iter_table; + + n = ((struct rt6_info *)v)->dst.rt6_next; + if (n) { + ++*pos; + return n; + } + +iter_table: + ipv6_route_check_sernum(iter); + read_lock(&iter->tbl->tb6_lock); + r = fib6_walk_continue(&iter->w); + read_unlock(&iter->tbl->tb6_lock); + if (r > 0) { + if (v) + ++*pos; + return iter->w.leaf; + } else if (r < 0) { + fib6_walker_unlink(&iter->w); + return NULL; + } + fib6_walker_unlink(&iter->w); + + iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); + if (!iter->tbl) + return NULL; + + ipv6_route_seq_setup_walk(iter); + goto iter_table; +} + +static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU_BH) +{ + struct net *net = seq_file_net(seq); + struct ipv6_route_iter *iter = seq->private; + + rcu_read_lock_bh(); + iter->tbl = ipv6_route_seq_next_table(NULL, net); + iter->skip = *pos; + + if (iter->tbl) { + ipv6_route_seq_setup_walk(iter); + return ipv6_route_seq_next(seq, NULL, pos); + } else { + return NULL; + } +} + +static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) +{ + struct fib6_walker_t *w = &iter->w; + return w->node && !(w->state == FWS_U && w->node == w->root); +} + +static void ipv6_route_seq_stop(struct seq_file *seq, void *v) + __releases(RCU_BH) +{ + struct ipv6_route_iter *iter = seq->private; + + if (ipv6_route_iter_active(iter)) + fib6_walker_unlink(&iter->w); + + rcu_read_unlock_bh(); +} + +static const struct seq_operations ipv6_route_seq_ops = { + .start = ipv6_route_seq_start, + .next = ipv6_route_seq_next, + .stop = ipv6_route_seq_stop, + .show = ipv6_route_seq_show +}; + +int ipv6_route_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ipv6_route_seq_ops, + sizeof(struct ipv6_route_iter)); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c979dd96d82a..c3130ffc3bca 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1137,7 +1137,6 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1236,7 +1235,6 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1258,7 +1256,6 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; - fl6.flowi6_flags = 0; fl6.daddr = msg->dest; fl6.saddr = iph->daddr; @@ -2800,56 +2797,12 @@ static int ip6_route_dev_notify(struct notifier_block *this, #ifdef CONFIG_PROC_FS -struct rt6_proc_arg -{ - char *buffer; - int offset; - int length; - int skip; - int len; -}; - -static int rt6_info_route(struct rt6_info *rt, void *p_arg) -{ - struct seq_file *m = p_arg; - - seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); - -#ifdef CONFIG_IPV6_SUBTREES - seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); -#else - seq_puts(m, "00000000000000000000000000000000 00 "); -#endif - if (rt->rt6i_flags & RTF_GATEWAY) { - seq_printf(m, "%pi6", &rt->rt6i_gateway); - } else { - seq_puts(m, "00000000000000000000000000000000"); - } - seq_printf(m, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), - rt->dst.__use, rt->rt6i_flags, - rt->dst.dev ? rt->dst.dev->name : ""); - return 0; -} - -static int ipv6_route_show(struct seq_file *m, void *v) -{ - struct net *net = (struct net *)m->private; - fib6_clean_all_ro(net, rt6_info_route, 0, m); - return 0; -} - -static int ipv6_route_open(struct inode *inode, struct file *file) -{ - return single_open_net(inode, file, ipv6_route_show); -} - static const struct file_operations ipv6_route_proc_fops = { .owner = THIS_MODULE, .open = ipv6_route_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release_net, + .release = seq_release_net, }; static int rt6_stats_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bf63ac8a49b9..d703218a653b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -24,26 +24,21 @@ #define COOKIEBITS 24 /* Upper bits store count */ #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -/* Table must be sorted. */ +/* RFC 2460, Section 8.3: + * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] + * + * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows + * using higher values than ipv4 tcp syncookies. + * The other values are chosen based on ethernet (1500 and 9k MTU), plus + * one that accounts for common encap (PPPoe) overhead. Table must be sorted. + */ static __u16 const msstab[] = { - 64, - 512, - 536, - 1280 - 60, + 1280 - 60, /* IPV6_MIN_MTU - 60 */ 1480 - 60, 1500 - 60, - 4460 - 60, 9000 - 60, }; -/* - * This (misnamed) value is the age of syncookie which is permitted. - * Its ideal value should be dependent on TCP_TIMEOUT_INIT and - * sysctl_tcp_retries1. It's a rather complicated formula (exponential - * backoff) to compute at runtime so it's currently hardcoded here. - */ -#define COUNTER_TRIES 4 - static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) @@ -86,8 +81,9 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, __be16 dport, __u32 sseq, - __u32 count, __u32 data) + __u32 data) { + u32 count = tcp_cookie_time(); return (cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq + (count << COOKIEBITS) + ((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) @@ -96,15 +92,14 @@ static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr, static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr, const struct in6_addr *daddr, __be16 sport, - __be16 dport, __u32 sseq, __u32 count, - __u32 maxdiff) + __be16 dport, __u32 sseq) { - __u32 diff; + __u32 diff, count = tcp_cookie_time(); cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq; diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); - if (diff >= maxdiff) + if (diff >= MAX_SYNCOOKIE_AGE) return (__u32)-1; return (cookie - @@ -125,8 +120,7 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, *mssp = msstab[mssind]; return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, - th->dest, ntohl(th->seq), - jiffies / (HZ * 60), mssind); + th->dest, ntohl(th->seq), mssind); } EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); @@ -146,8 +140,7 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, { __u32 seq = ntohl(th->seq) - 1; __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, - th->source, th->dest, seq, - jiffies / (HZ * 60), COUNTER_TRIES); + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5c71501fc917..dde8bad04481 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1811,7 +1811,7 @@ static void get_timewait6_sock(struct seq_file *seq, const struct in6_addr *dest, *src; __u16 destp, srcp; const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - long delta = tw->tw_ttd - jiffies; + s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = &tw6->tw_v6_daddr; src = &tw6->tw_v6_rcv_saddr; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 72b7eaaf3ca0..37532478e3ba 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -57,8 +57,6 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); - __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); @@ -67,8 +65,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) return (!sk2_ipv6only && - (!sk1_rcv_saddr || !sk2_rcv_saddr || - sk1_rcv_saddr == sk2_rcv_saddr)); + (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || + sk->sk_rcv_saddr == sk2->sk_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) @@ -551,8 +549,10 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; - if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) + if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) { sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); + } rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { @@ -846,7 +846,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; - sk_mark_napi_id(sk, skb); ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 1aba645882bd..3fb9dd6d02fc 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -77,13 +77,13 @@ DECLARE_EVENT_CLASS(local_sdata_addr_evt, TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY - __array(char, addr, 6) + __array(char, addr, ETH_ALEN) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; - memcpy(__entry->addr, sdata->vif.addr, 6); + memcpy(__entry->addr, sdata->vif.addr, ETH_ALEN); ), TP_printk( diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig index ba36c283d837..a2d6263b6c64 100644 --- a/net/netfilter/ipset/Kconfig +++ b/net/netfilter/ipset/Kconfig @@ -1,7 +1,7 @@ menuconfig IP_SET tristate "IP set support" depends on INET && NETFILTER - depends on NETFILTER_NETLINK + select NETFILTER_NETLINK help This option adds IP set support to the kernel. In order to define and use the sets, you need the userspace utility @@ -90,6 +90,15 @@ config IP_SET_HASH_IPPORTNET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETPORTNET + tristate "hash:net,port,net set support" + depends on IP_SET + help + This option adds the hash:net,port,net set type support, by which + one can store two IPv4/IPv6 subnets, and a protocol/port in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NET tristate "hash:net set support" depends on IP_SET @@ -99,6 +108,15 @@ config IP_SET_HASH_NET To compile it as a module, choose M here. If unsure, say N. +config IP_SET_HASH_NETNET + tristate "hash:net,net set support" + depends on IP_SET + help + This option adds the hash:net,net set type support, by which + one can store IPv4/IPv6 network address/prefix pairs in a set. + + To compile it as a module, choose M here. If unsure, say N. + config IP_SET_HASH_NETPORT tristate "hash:net,port set support" depends on IP_SET diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 6e965ecd5444..44b2d38476fa 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -20,6 +20,8 @@ obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o +obj-$(CONFIG_IP_SET_HASH_NETNET) += ip_set_hash_netnet.o +obj-$(CONFIG_IP_SET_HASH_NETPORTNET) += ip_set_hash_netportnet.o # list types obj-$(CONFIG_IP_SET_LIST_SET) += ip_set_list_set.o diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 25243379b887..a13e15be7911 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -8,38 +8,32 @@ #ifndef __IP_SET_BITMAP_IP_GEN_H #define __IP_SET_BITMAP_IP_GEN_H -#define CONCAT(a, b) a##b -#define TOKEN(a,b) CONCAT(a, b) - -#define mtype_do_test TOKEN(MTYPE, _do_test) -#define mtype_gc_test TOKEN(MTYPE, _gc_test) -#define mtype_is_filled TOKEN(MTYPE, _is_filled) -#define mtype_do_add TOKEN(MTYPE, _do_add) -#define mtype_do_del TOKEN(MTYPE, _do_del) -#define mtype_do_list TOKEN(MTYPE, _do_list) -#define mtype_do_head TOKEN(MTYPE, _do_head) -#define mtype_adt_elem TOKEN(MTYPE, _adt_elem) -#define mtype_add_timeout TOKEN(MTYPE, _add_timeout) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) +#define mtype_do_test IPSET_TOKEN(MTYPE, _do_test) +#define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test) +#define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled) +#define mtype_do_add IPSET_TOKEN(MTYPE, _do_add) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) +#define mtype_do_del IPSET_TOKEN(MTYPE, _do_del) +#define mtype_do_list IPSET_TOKEN(MTYPE, _do_list) +#define mtype_do_head IPSET_TOKEN(MTYPE, _do_head) +#define mtype_adt_elem IPSET_TOKEN(MTYPE, _adt_elem) +#define mtype_add_timeout IPSET_TOKEN(MTYPE, _add_timeout) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype MTYPE -#define ext_timeout(e, m) \ - (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) -#define ext_counter(e, m) \ - (struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER]) -#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id)) +#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id)) static void mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) @@ -49,11 +43,22 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } static void +mtype_ext_cleanup(struct ip_set *set) +{ + struct mtype *map = set->data; + u32 id; + + for (id = 0; id < map->elements; id++) + if (test_bit(id, map->members)) + ip_set_ext_destroy(set, get_ext(set, map, id)); +} + +static void mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; @@ -62,8 +67,11 @@ mtype_destroy(struct ip_set *set) del_timer_sync(&map->gc); ip_set_free(map->members); - if (map->dsize) + if (set->dsize) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); ip_set_free(map->extensions); + } kfree(map); set->data = NULL; @@ -74,6 +82,8 @@ mtype_flush(struct ip_set *set) { struct mtype *map = set->data; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set); memset(map->members, 0, map->memsize); } @@ -91,12 +101,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + map->memsize + - map->dsize * map->elements)) || - (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || - (SET_WITH_COUNTER(set) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS)))) + set->dsize * map->elements))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -111,16 +118,16 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_test(e, map); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_test(e, map, set->dsize); if (ret <= 0) return ret; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) return 0; if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(x, map), ext, mext, flags); + ip_set_update_counter(ext_counter(x, set), ext, mext, flags); return 1; } @@ -130,26 +137,30 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - void *x = get_ext(map, e->id); - int ret = mtype_do_add(e, map, flags); + void *x = get_ext(set, map, e->id); + int ret = mtype_do_add(e, map, flags, set->dsize); if (ret == IPSET_ADD_FAILED) { if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map))) + ip_set_timeout_expired(ext_timeout(x, set))) ret = 0; else if (!(flags & IPSET_FLAG_EXIST)) return -IPSET_ERR_EXIST; + /* Element is re-added, cleanup extensions */ + ip_set_ext_destroy(set, x); } if (SET_WITH_TIMEOUT(set)) #ifdef IP_SET_BITMAP_STORED_TIMEOUT - mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret); + mtype_add_timeout(ext_timeout(x, set), e, ext, set, map, ret); #else - ip_set_timeout_set(ext_timeout(x, map), ext->timeout); + ip_set_timeout_set(ext_timeout(x, set), ext->timeout); #endif if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(x, map), ext); + ip_set_init_counter(ext_counter(x, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(x, set), ext); return 0; } @@ -159,16 +170,27 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct mtype *map = set->data; const struct mtype_adt_elem *e = value; - const void *x = get_ext(map, e->id); + void *x = get_ext(set, map, e->id); - if (mtype_do_del(e, map) || - (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(x, map)))) + if (mtype_do_del(e, map)) + return -IPSET_ERR_EXIST; + + ip_set_ext_destroy(set, x); + if (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(x, set))) return -IPSET_ERR_EXIST; return 0; } +#ifndef IP_SET_BITMAP_STORED_TIMEOUT +static inline bool +mtype_is_filled(const struct mtype_elem *x) +{ + return true; +} +#endif + static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) @@ -183,13 +205,13 @@ mtype_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->elements; cb->args[2]++) { id = cb->args[2]; - x = get_ext(map, id); + x = get_ext(set, map, id); if (!test_bit(id, map->members) || (SET_WITH_TIMEOUT(set) && #ifdef IP_SET_BITMAP_STORED_TIMEOUT mtype_is_filled((const struct mtype_elem *) x) && #endif - ip_set_timeout_expired(ext_timeout(x, map)))) + ip_set_timeout_expired(ext_timeout(x, set)))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -199,23 +221,10 @@ mtype_list(const struct ip_set *set, } else goto nla_put_failure; } - if (mtype_do_list(skb, map, id)) + if (mtype_do_list(skb, map, id, set->dsize)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set)) { -#ifdef IP_SET_BITMAP_STORED_TIMEOUT - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_stored(map, id, - ext_timeout(x, map))))) - goto nla_put_failure; -#else - if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(x, map))))) - goto nla_put_failure; -#endif - } - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(x, map))) + if (ip_set_put_extensions(skb, set, x, + mtype_is_filled((const struct mtype_elem *) x))) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -228,11 +237,11 @@ mtype_list(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); - ipset_nest_end(skb, adt); if (unlikely(id == first)) { cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, adt); return 0; } @@ -241,21 +250,23 @@ mtype_gc(unsigned long ul_set) { struct ip_set *set = (struct ip_set *) ul_set; struct mtype *map = set->data; - const void *x; + void *x; u32 id; /* We run parallel with other readers (test element) * but adding/deleting new entries is locked out */ read_lock_bh(&set->lock); for (id = 0; id < map->elements; id++) - if (mtype_gc_test(id, map)) { - x = get_ext(map, id); - if (ip_set_timeout_expired(ext_timeout(x, map))) + if (mtype_gc_test(id, map, set->dsize)) { + x = get_ext(set, map, id); + if (ip_set_timeout_expired(ext_timeout(x, set))) { clear_bit(id, map->members); + ip_set_ext_destroy(set, x); + } } read_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index f1a8128bef01..6f1f9f494808 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -25,12 +25,13 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_bitmap.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip"); #define MTYPE bitmap_ip @@ -44,10 +45,7 @@ struct bitmap_ip { u32 elements; /* number of max elements in the set */ u32 hosts; /* number of hosts in a subnet */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ u8 netmask; /* subnet netmask */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -65,20 +63,21 @@ ip_to_id(const struct bitmap_ip *m, u32 ip) /* Common functions */ static inline int -bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) +bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, + struct bitmap_ip *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map) +bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map, - u32 flags) + u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -90,7 +89,8 @@ bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map) } static inline int -bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id) +bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id, + size_t dsize) { return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id * map->hosts)); @@ -113,7 +113,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); @@ -131,9 +131,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[], { struct bitmap_ip *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; - u32 ip, ip_to; + u32 ip = 0, ip_to = 0; struct bitmap_ip_adt_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -200,7 +200,7 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && x->netmask == y->netmask && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -209,25 +209,6 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_ip_elem { }; -/* Timeout variant */ - -struct bitmap_ipt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -240,8 +221,8 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -252,7 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->elements = elements; map->hosts = hosts; map->netmask = netmask; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -261,10 +242,11 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, } static int -bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_ip *map; - u32 first_ip, last_ip, hosts, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0, hosts; u64 elements; u8 netmask = 32; int ret; @@ -336,61 +318,15 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ip; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_ipct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_ipct_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - - map->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - - bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - map->dsize = sizeof(struct bitmap_ipc_elem); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_ipc_elem, counter); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_ipt_elem, timeout); - - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } - - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_ip(set, map, first_ip, last_ip, + elements, hosts, netmask)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_ip_gc_init(set, bitmap_ip_gc); - } else { - map->dsize = 0; - if (!init_map_ip(set, map, first_ip, last_ip, - elements, hosts, netmask)) { - kfree(map); - return -ENOMEM; - } } return 0; } @@ -401,8 +337,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ip_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, @@ -420,6 +356,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 3b30e0bef890..740eabededd9 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -25,12 +25,13 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_bitmap.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:ip,mac"); #define MTYPE bitmap_ipmac @@ -48,11 +49,8 @@ struct bitmap_ipmac { u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ - u32 timeout; /* timeout value */ - struct timer_list gc; /* garbage collector */ size_t memsize; /* members size */ - size_t dsize; /* size of element */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ + struct timer_list gc; /* garbage collector */ }; /* ADT structure for generic function args */ @@ -82,13 +80,13 @@ get_elem(void *extensions, u16 id, size_t dsize) static inline int bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, - const struct bitmap_ipmac *map) + const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(e->id, map->members)) return 0; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (elem->filled == MAC_FILLED) return e->ether == NULL || ether_addr_equal(e->ether, elem->ether); @@ -97,13 +95,13 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, } static inline int -bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map) +bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize) { const struct bitmap_ipmac_elem *elem; if (!test_bit(id, map->members)) return 0; - elem = get_elem(map->extensions, id, map->dsize); + elem = get_elem(map->extensions, id, dsize); /* Timer not started for the incomplete elements */ return elem->filled == MAC_FILLED; } @@ -117,13 +115,13 @@ bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem) static inline int bitmap_ipmac_add_timeout(unsigned long *timeout, const struct bitmap_ipmac_adt_elem *e, - const struct ip_set_ext *ext, + const struct ip_set_ext *ext, struct ip_set *set, struct bitmap_ipmac *map, int mode) { u32 t = ext->timeout; if (mode == IPSET_ADD_START_STORED_TIMEOUT) { - if (t == map->timeout) + if (t == set->timeout) /* Timeout was not specified, get stored one */ t = *timeout; ip_set_timeout_set(timeout, t); @@ -142,11 +140,11 @@ bitmap_ipmac_add_timeout(unsigned long *timeout, static inline int bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, - struct bitmap_ipmac *map, u32 flags) + struct bitmap_ipmac *map, u32 flags, size_t dsize) { struct bitmap_ipmac_elem *elem; - elem = get_elem(map->extensions, e->id, map->dsize); + elem = get_elem(map->extensions, e->id, dsize); if (test_and_set_bit(e->id, map->members)) { if (elem->filled == MAC_FILLED) { if (e->ether && (flags & IPSET_FLAG_EXIST)) @@ -178,22 +176,12 @@ bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, return !test_and_clear_bit(e->id, map->members); } -static inline unsigned long -ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout) -{ - const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); - - return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : - *timeout; -} - static inline int bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, - u32 id) + u32 id, size_t dsize) { const struct bitmap_ipmac_elem *elem = - get_elem(map->extensions, id, map->dsize); + get_elem(map->extensions, id, dsize); return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip + id)) || @@ -216,7 +204,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); u32 ip; /* MAC can be src only */ @@ -245,8 +233,8 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], const struct bitmap_ipmac *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_ipmac_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); - u32 ip; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0; int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -285,43 +273,12 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_ip == y->first_ip && x->last_ip == y->last_ip && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } /* Plain variant */ -/* Timeout variant */ - -struct bitmap_ipmact_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_ipmacc_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_ipmacct_elem { - struct { - unsigned char ether[ETH_ALEN]; - unsigned char filled; - } __attribute__ ((aligned)); - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip,mac type of sets */ @@ -330,11 +287,11 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize); + map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * elements); if (!map->extensions) { kfree(map->members); return false; @@ -343,7 +300,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, map->first_ip = first_ip; map->last_ip = last_ip; map->elements = elements; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_IPV4; @@ -352,10 +309,10 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, } static int -bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], +bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { - u32 first_ip, last_ip, cadt_flags = 0; + u32 first_ip = 0, last_ip = 0; u64 elements; struct bitmap_ipmac *map; int ret; @@ -399,57 +356,15 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], map->memsize = bitmap_bytes(0, elements - 1); set->variant = &bitmap_ipmac; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmacct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_ipmacct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_ipmacct_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - map->timeout = ip_set_timeout_uget( - tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - map->dsize = sizeof(struct bitmap_ipmacc_elem); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_ipmacc_elem, counter); - - if (!init_map_ipmac(set, map, first_ip, last_ip, - elements)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_ipmact_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_ipmact_elem, timeout); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct bitmap_ipmac_elem)); + if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); - } else { - map->dsize = sizeof(struct bitmap_ipmac_elem); - - if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { - kfree(map); - return -ENOMEM; - } - set->variant = &bitmap_ipmac; } return 0; } @@ -460,8 +375,8 @@ static struct ip_set_type bitmap_ipmac_type = { .features = IPSET_TYPE_IP | IPSET_TYPE_MAC, .dimension = IPSET_DIM_TWO, .family = NFPROTO_IPV4, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_ipmac_create, .create_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, @@ -478,6 +393,7 @@ static struct ip_set_type bitmap_ipmac_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 8207d1fda528..e7603c5b53d7 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -20,12 +20,13 @@ #include <linux/netfilter/ipset/ip_set_bitmap.h> #include <linux/netfilter/ipset/ip_set_getport.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counter support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counter support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comment support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("bitmap:port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_bitmap:port"); #define MTYPE bitmap_port @@ -38,9 +39,6 @@ struct bitmap_port { u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ - size_t dsize; /* extensions struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ - u32 timeout; /* timeout parameter */ struct timer_list gc; /* garbage collection */ }; @@ -59,20 +57,20 @@ port_to_id(const struct bitmap_port *m, u16 port) static inline int bitmap_port_do_test(const struct bitmap_port_adt_elem *e, - const struct bitmap_port *map) + const struct bitmap_port *map, size_t dsize) { return !!test_bit(e->id, map->members); } static inline int -bitmap_port_gc_test(u16 id, const struct bitmap_port *map) +bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize) { return !!test_bit(id, map->members); } static inline int bitmap_port_do_add(const struct bitmap_port_adt_elem *e, - struct bitmap_port *map, u32 flags) + struct bitmap_port *map, u32 flags, size_t dsize) { return !!test_and_set_bit(e->id, map->members); } @@ -85,7 +83,8 @@ bitmap_port_do_del(const struct bitmap_port_adt_elem *e, } static inline int -bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id) +bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id, + size_t dsize) { return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port + id)); @@ -106,7 +105,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be16 __port; u16 port = 0; @@ -131,7 +130,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[], struct bitmap_port *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct bitmap_port_adt_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port; /* wraparound */ u16 port_to; int ret = 0; @@ -191,7 +190,7 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) return x->first_port == y->first_port && x->last_port == y->last_port && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -200,25 +199,6 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) struct bitmap_port_elem { }; -/* Timeout variant */ - -struct bitmap_portt_elem { - unsigned long timeout; -}; - -/* Plain variant with counter */ - -struct bitmap_portc_elem { - struct ip_set_counter counter; -}; - -/* Timeout variant with counter */ - -struct bitmap_portct_elem { - unsigned long timeout; - struct ip_set_counter counter; -}; - #include "ip_set_bitmap_gen.h" /* Create bitmap:ip type of sets */ @@ -230,8 +210,8 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, map->members = ip_set_alloc(map->memsize); if (!map->members) return false; - if (map->dsize) { - map->extensions = ip_set_alloc(map->dsize * map->elements); + if (set->dsize) { + map->extensions = ip_set_alloc(set->dsize * map->elements); if (!map->extensions) { kfree(map->members); return false; @@ -239,7 +219,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } map->first_port = first_port; map->last_port = last_port; - map->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; set->data = map; set->family = NFPROTO_UNSPEC; @@ -248,11 +228,11 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, } static int -bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { struct bitmap_port *map; u16 first_port, last_port; - u32 cadt_flags = 0; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || @@ -276,53 +256,14 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags) map->elements = last_port - first_port + 1; map->memsize = map->elements * sizeof(unsigned long); set->variant = &bitmap_port; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portct_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_portct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_portct_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - - map->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - bitmap_port_gc_init(set, bitmap_port_gc); - } else { - map->dsize = sizeof(struct bitmap_portc_elem); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct bitmap_portc_elem, counter); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map->dsize = sizeof(struct bitmap_portt_elem); - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct bitmap_portt_elem, timeout); - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - - map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; + set->dsize = ip_set_elem_len(set, tb, 0); + if (!init_map_port(set, map, first_port, last_port)) { + kfree(map); + return -ENOMEM; + } + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); bitmap_port_gc_init(set, bitmap_port_gc); - } else { - map->dsize = 0; - if (!init_map_port(set, map, first_port, last_port)) { - kfree(map); - return -ENOMEM; - } - } return 0; } @@ -333,8 +274,8 @@ static struct ip_set_type bitmap_port_type = { .features = IPSET_TYPE_PORT, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = bitmap_port_create, .create_policy = { [IPSET_ATTR_PORT] = { .type = NLA_U16 }, @@ -349,6 +290,7 @@ static struct ip_set_type bitmap_port_type = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index f2e30fb31e78..dc9284bdd2dd 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -17,6 +17,8 @@ #include <linux/spinlock.h> #include <linux/rculist.h> #include <net/netlink.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> @@ -27,8 +29,17 @@ static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ -static struct ip_set * __rcu *ip_set_list; /* all individual sets */ -static ip_set_id_t ip_set_max = CONFIG_IP_SET_MAX; /* max number of sets */ +struct ip_set_net { + struct ip_set * __rcu *ip_set_list; /* all individual sets */ + ip_set_id_t ip_set_max; /* max number of sets */ + int is_deleted; /* deleted by ip_set_net_exit */ +}; +static int ip_set_net_id __read_mostly; + +static inline struct ip_set_net *ip_set_pernet(struct net *net) +{ + return net_generic(net, ip_set_net_id); +} #define IP_SET_INC 64 #define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) @@ -45,8 +56,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex is held: */ #define nfnl_dereference(p) \ rcu_dereference_protected(p, 1) -#define nfnl_set(id) \ - nfnl_dereference(ip_set_list)[id] +#define nfnl_set(inst, id) \ + nfnl_dereference((inst)->ip_set_list)[id] /* * The set types are implemented in modules and registered set types @@ -315,6 +326,60 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +typedef void (*destroyer)(void *); +/* ipset data extension types, in size order */ + +const struct ip_set_ext_type ip_set_extensions[] = { + [IPSET_EXT_ID_COUNTER] = { + .type = IPSET_EXT_COUNTER, + .flag = IPSET_FLAG_WITH_COUNTERS, + .len = sizeof(struct ip_set_counter), + .align = __alignof__(struct ip_set_counter), + }, + [IPSET_EXT_ID_TIMEOUT] = { + .type = IPSET_EXT_TIMEOUT, + .len = sizeof(unsigned long), + .align = __alignof__(unsigned long), + }, + [IPSET_EXT_ID_COMMENT] = { + .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, + .flag = IPSET_FLAG_WITH_COMMENT, + .len = sizeof(struct ip_set_comment), + .align = __alignof__(struct ip_set_comment), + .destroy = (destroyer) ip_set_comment_free, + }, +}; +EXPORT_SYMBOL_GPL(ip_set_extensions); + +static inline bool +add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[]) +{ + return ip_set_extensions[id].flag ? + (flags & ip_set_extensions[id].flag) : + !!tb[IPSET_ATTR_TIMEOUT]; +} + +size_t +ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len) +{ + enum ip_set_ext_id id; + size_t offset = 0; + u32 cadt_flags = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) + cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + for (id = 0; id < IPSET_EXT_ID_MAX; id++) { + if (!add_extension(id, cadt_flags, tb)) + continue; + offset += ALIGN(len + offset, ip_set_extensions[id].align); + set->offset[id] = offset; + set->extensions |= ip_set_extensions[id].type; + offset += ip_set_extensions[id].len; + } + return len + offset; +} +EXPORT_SYMBOL_GPL(ip_set_elem_len); + int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) @@ -334,6 +399,12 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], ext->packets = be64_to_cpu(nla_get_be64( tb[IPSET_ATTR_PACKETS])); } + if (tb[IPSET_ATTR_COMMENT]) { + if (!(set->extensions & IPSET_EXT_COMMENT)) + return -IPSET_ERR_COMMENT; + ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); + } + return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); @@ -374,13 +445,14 @@ __ip_set_put(struct ip_set *set) */ static inline struct ip_set * -ip_set_rcu_get(ip_set_id_t index) +ip_set_rcu_get(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); /* ip_set_list itself needs to be protected */ - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; rcu_read_unlock(); return set; @@ -390,7 +462,8 @@ int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -428,7 +501,8 @@ int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret; BUG_ON(set == NULL); @@ -450,7 +524,8 @@ int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { - struct ip_set *set = ip_set_rcu_get(index); + struct ip_set *set = ip_set_rcu_get( + dev_net(par->in ? par->in : par->out), index); int ret = 0; BUG_ON(set == NULL); @@ -474,14 +549,15 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); - for (i = 0; i < ip_set_max; i++) { - s = rcu_dereference(ip_set_list)[i]; + for (i = 0; i < inst->ip_set_max; i++) { + s = rcu_dereference(inst->ip_set_list)[i]; if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -501,17 +577,26 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname); * to be valid, after calling this function. * */ -void -ip_set_put_byindex(ip_set_id_t index) + +static inline void +__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) { struct ip_set *set; rcu_read_lock(); - set = rcu_dereference(ip_set_list)[index]; + set = rcu_dereference(inst->ip_set_list)[index]; if (set != NULL) __ip_set_put(set); rcu_read_unlock(); } + +void +ip_set_put_byindex(struct net *net, ip_set_id_t index) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + __ip_set_put_byindex(inst, index); +} EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* @@ -522,9 +607,9 @@ EXPORT_SYMBOL_GPL(ip_set_put_byindex); * */ const char * -ip_set_name_byindex(ip_set_id_t index) +ip_set_name_byindex(struct net *net, ip_set_id_t index) { - const struct ip_set *set = ip_set_rcu_get(index); + const struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(set == NULL); BUG_ON(set->ref == 0); @@ -546,14 +631,15 @@ EXPORT_SYMBOL_GPL(ip_set_name_byindex); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get(const char *name) +ip_set_nfnl_get(struct net *net, const char *name) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; + struct ip_set_net *inst = ip_set_pernet(net); nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name)) { __ip_set_get(s); index = i; @@ -573,15 +659,16 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get); * The nfnl mutex is used in the function. */ ip_set_id_t -ip_set_nfnl_get_byindex(ip_set_id_t index) +ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); - if (index > ip_set_max) + if (index > inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set) __ip_set_get(set); else @@ -600,13 +687,17 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); * The nfnl mutex is used in the function. */ void -ip_set_nfnl_put(ip_set_id_t index) +ip_set_nfnl_put(struct net *net, ip_set_id_t index) { struct ip_set *set; + struct ip_set_net *inst = ip_set_pernet(net); + nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(index); - if (set != NULL) - __ip_set_put(set); + if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ + set = nfnl_set(inst, index); + if (set != NULL) + __ip_set_put(set); + } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); @@ -664,14 +755,14 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static struct ip_set * -find_set_and_id(const char *name, ip_set_id_t *id) +find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) { struct ip_set *set = NULL; ip_set_id_t i; *id = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - set = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); if (set != NULL && STREQ(set->name, name)) { *id = i; break; @@ -681,22 +772,23 @@ find_set_and_id(const char *name, ip_set_id_t *id) } static inline struct ip_set * -find_set(const char *name) +find_set(struct ip_set_net *inst, const char *name) { ip_set_id_t id; - return find_set_and_id(name, &id); + return find_set_and_id(inst, name, &id); } static int -find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set) +find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, + struct ip_set **set) { struct ip_set *s; ip_set_id_t i; *index = IPSET_INVALID_ID; - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s == NULL) { if (*index == IPSET_INVALID_ID) *index = i; @@ -725,6 +817,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct net *net = sock_net(ctnl); + struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {}; @@ -783,7 +877,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto put_out; } - ret = set->type->create(set, tb, flags); + ret = set->type->create(net, set, tb, flags); if (ret != 0) goto put_out; @@ -794,7 +888,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ - ret = find_free_id(set->name, &index, &clash); + ret = find_free_id(inst, set->name, &index, &clash); if (ret == -EEXIST) { /* If this is the same set and requested, ignore error */ if ((flags & IPSET_FLAG_EXIST) && @@ -807,9 +901,9 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, goto cleanup; } else if (ret == -IPSET_ERR_MAX_SETS) { struct ip_set **list, **tmp; - ip_set_id_t i = ip_set_max + IP_SET_INC; + ip_set_id_t i = inst->ip_set_max + IP_SET_INC; - if (i < ip_set_max || i == IPSET_INVALID_ID) + if (i < inst->ip_set_max || i == IPSET_INVALID_ID) /* Wraparound */ goto cleanup; @@ -817,14 +911,14 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = nfnl_dereference(ip_set_list); - memcpy(list, tmp, sizeof(struct ip_set *) * ip_set_max); - rcu_assign_pointer(ip_set_list, list); + tmp = nfnl_dereference(inst->ip_set_list); + memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); + rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ synchronize_net(); /* Use new list */ - index = ip_set_max; - ip_set_max = i; + index = inst->ip_set_max; + inst->ip_set_max = i; kfree(tmp); ret = 0; } else if (ret) @@ -834,7 +928,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb, * Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); - nfnl_set(index) = set; + nfnl_set(inst, index) = set; return ret; @@ -857,12 +951,12 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static void -ip_set_destroy_set(ip_set_id_t index) +ip_set_destroy_set(struct ip_set_net *inst, ip_set_id_t index) { - struct ip_set *set = nfnl_set(index); + struct ip_set *set = nfnl_set(inst, index); pr_debug("set: %s\n", set->name); - nfnl_set(index) = NULL; + nfnl_set(inst, index) = NULL; /* Must call it without holding any lock */ set->variant->destroy(set); @@ -875,6 +969,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; int ret = 0; @@ -894,21 +989,22 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, */ read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && s->ref) { ret = -IPSET_ERR_BUSY; goto out; } } read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } } else { - s = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &i); + s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &i); if (s == NULL) { ret = -ENOENT; goto out; @@ -918,7 +1014,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb, } read_unlock_bh(&ip_set_ref_lock); - ip_set_destroy_set(i); + ip_set_destroy_set(inst, i); } return 0; out: @@ -943,6 +1039,7 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *s; ip_set_id_t i; @@ -950,13 +1047,13 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL) ip_set_flush_set(s); } } else { - s = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (s == NULL) return -ENOENT; @@ -982,6 +1079,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set, *s; const char *name2; ip_set_id_t i; @@ -992,7 +1090,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1003,8 +1101,8 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb, } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); - for (i = 0; i < ip_set_max; i++) { - s = nfnl_set(i); + for (i = 0; i < inst->ip_set_max; i++) { + s = nfnl_set(inst, i); if (s != NULL && STREQ(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; @@ -1031,6 +1129,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; @@ -1040,11 +1139,13 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME2] == NULL)) return -IPSET_ERR_PROTOCOL; - from = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); + from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), + &from_id); if (from == NULL) return -ENOENT; - to = find_set_and_id(nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); + to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), + &to_id); if (to == NULL) return -IPSET_ERR_EXIST_SETNAME2; @@ -1061,8 +1162,8 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); - nfnl_set(from_id) = to; - nfnl_set(to_id) = from; + nfnl_set(inst, from_id) = to; + nfnl_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); return 0; @@ -1081,9 +1182,10 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb, static int ip_set_dump_done(struct netlink_callback *cb) { + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(cb->args[1])->name); - ip_set_put_byindex((ip_set_id_t) cb->args[1]); + pr_debug("release set %s\n", nfnl_set(inst, cb->args[1])->name); + __ip_set_put_byindex(inst, (ip_set_id_t) cb->args[1]); } return 0; } @@ -1109,6 +1211,7 @@ dump_init(struct netlink_callback *cb) struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; ip_set_id_t index; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; /* Second pass, so parser can't fail */ nla_parse(cda, IPSET_ATTR_CMD_MAX, @@ -1122,7 +1225,7 @@ dump_init(struct netlink_callback *cb) if (cda[IPSET_ATTR_SETNAME]) { struct ip_set *set; - set = find_set_and_id(nla_data(cda[IPSET_ATTR_SETNAME]), + set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); if (set == NULL) return -ENOENT; @@ -1150,6 +1253,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; + struct ip_set_net *inst = (struct ip_set_net *)cb->data; if (!cb->args[0]) { ret = dump_init(cb); @@ -1163,18 +1267,18 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) } } - if (cb->args[1] >= ip_set_max) + if (cb->args[1] >= inst->ip_set_max) goto out; dump_type = DUMP_TYPE(cb->args[0]); dump_flags = DUMP_FLAGS(cb->args[0]); - max = dump_type == DUMP_ONE ? cb->args[1] + 1 : ip_set_max; + max = dump_type == DUMP_ONE ? cb->args[1] + 1 : inst->ip_set_max; dump_last: pr_debug("args[0]: %u %u args[1]: %ld\n", dump_type, dump_flags, cb->args[1]); for (; cb->args[1] < max; cb->args[1]++) { index = (ip_set_id_t) cb->args[1]; - set = nfnl_set(index); + set = nfnl_set(inst, index); if (set == NULL) { if (dump_type == DUMP_ONE) { ret = -ENOENT; @@ -1252,8 +1356,8 @@ next_set: release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[2]) { - pr_debug("release set %s\n", nfnl_set(index)->name); - ip_set_put_byindex(index); + pr_debug("release set %s\n", nfnl_set(inst, index)->name); + __ip_set_put_byindex(inst, index); cb->args[2] = 0; } out: @@ -1271,6 +1375,8 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); + if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; @@ -1278,6 +1384,7 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, struct netlink_dump_control c = { .dump = ip_set_dump_start, .done = ip_set_dump_done, + .data = (void *)inst }; return netlink_dump_start(ctnl, skb, nlh, &c); } @@ -1356,6 +1463,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1374,7 +1482,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1410,6 +1518,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; const struct nlattr *nla; @@ -1428,7 +1537,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_LINENO] == NULL)))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1464,6 +1573,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {}; int ret = 0; @@ -1474,7 +1584,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb, !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1499,6 +1609,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const attr[]) { + struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl)); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; @@ -1508,7 +1619,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, attr[IPSET_ATTR_SETNAME] == NULL)) return -IPSET_ERR_PROTOCOL; - set = find_set(nla_data(attr[IPSET_ATTR_SETNAME])); + set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (set == NULL) return -ENOENT; @@ -1733,8 +1844,10 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) unsigned int *op; void *data; int copylen = *len, ret = 0; + struct net *net = sock_net(sk); + struct ip_set_net *inst = ip_set_pernet(net); - if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (optval != SO_IP_SET) return -EBADF; @@ -1783,22 +1896,39 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); - find_set_and_id(req_get->set.name, &id); + find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } + case IP_SET_OP_GET_FNAME: { + struct ip_set_req_get_set_family *req_get = data; + ip_set_id_t id; + + if (*len != sizeof(struct ip_set_req_get_set_family)) { + ret = -EINVAL; + goto done; + } + req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; + nfnl_lock(NFNL_SUBSYS_IPSET); + find_set_and_id(inst, req_get->set.name, &id); + req_get->set.index = id; + if (id != IPSET_INVALID_ID) + req_get->family = nfnl_set(inst, id)->family; + nfnl_unlock(NFNL_SUBSYS_IPSET); + goto copy; + } case IP_SET_OP_GET_BYINDEX: { struct ip_set_req_get_set *req_get = data; struct ip_set *set; if (*len != sizeof(struct ip_set_req_get_set) || - req_get->set.index >= ip_set_max) { + req_get->set.index >= inst->ip_set_max) { ret = -EINVAL; goto done; } nfnl_lock(NFNL_SUBSYS_IPSET); - set = nfnl_set(req_get->set.index); + set = nfnl_set(inst, req_get->set.index); strncpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); @@ -1827,49 +1957,82 @@ static struct nf_sockopt_ops so_set __read_mostly = { .owner = THIS_MODULE, }; -static int __init -ip_set_init(void) +static int __net_init +ip_set_net_init(struct net *net) { + struct ip_set_net *inst = ip_set_pernet(net); + struct ip_set **list; - int ret; - if (max_sets) - ip_set_max = max_sets; - if (ip_set_max >= IPSET_INVALID_ID) - ip_set_max = IPSET_INVALID_ID - 1; + inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; + if (inst->ip_set_max >= IPSET_INVALID_ID) + inst->ip_set_max = IPSET_INVALID_ID - 1; - list = kzalloc(sizeof(struct ip_set *) * ip_set_max, GFP_KERNEL); + list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL); if (!list) return -ENOMEM; + inst->is_deleted = 0; + rcu_assign_pointer(inst->ip_set_list, list); + pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + return 0; +} + +static void __net_exit +ip_set_net_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + struct ip_set *set = NULL; + ip_set_id_t i; + + inst->is_deleted = 1; /* flag for ip_set_nfnl_put */ + + for (i = 0; i < inst->ip_set_max; i++) { + set = nfnl_set(inst, i); + if (set != NULL) + ip_set_destroy_set(inst, i); + } + kfree(rcu_dereference_protected(inst->ip_set_list, 1)); +} + +static struct pernet_operations ip_set_net_ops = { + .init = ip_set_net_init, + .exit = ip_set_net_exit, + .id = &ip_set_net_id, + .size = sizeof(struct ip_set_net) +}; + - rcu_assign_pointer(ip_set_list, list); - ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); +static int __init +ip_set_init(void) +{ + int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); - kfree(list); return ret; } ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); return ret; } - - pr_notice("ip_set: protocol %u\n", IPSET_PROTOCOL); + ret = register_pernet_subsys(&ip_set_net_ops); + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + nf_unregister_sockopt(&so_set); + nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + return ret; + } return 0; } static void __exit ip_set_fini(void) { - struct ip_set **list = rcu_dereference_protected(ip_set_list, 1); - - /* There can't be any existing set */ + unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - kfree(list); pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index dac156f819ac..29fb01ddff93 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -102,9 +102,25 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src, int protocol = iph->protocol; /* See comments at tcp_match in ip_tables.c */ - if (protocol <= 0 || (ntohs(iph->frag_off) & IP_OFFSET)) + if (protocol <= 0) return false; + if (ntohs(iph->frag_off) & IP_OFFSET) + switch (protocol) { + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_ICMP: + /* Port info not available for fragment offset > 0 */ + return false; + default: + /* Other protocols doesn't have ports, + so we can match fragments */ + *proto = protocol; + return true; + } + return get_port(skb, protocol, protooff, src, port, proto); } EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 707bc520d629..6a80dbd30df7 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -15,8 +15,7 @@ #define rcu_dereference_bh(p) rcu_dereference(p) #endif -#define CONCAT(a, b) a##b -#define TOKEN(a, b) CONCAT(a, b) +#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. @@ -78,10 +77,14 @@ struct htable { #define hbucket(h, i) (&((h)->bucket[i])) +#ifndef IPSET_NET_COUNT +#define IPSET_NET_COUNT 1 +#endif + /* Book-keeping of the prefixes added to the set */ struct net_prefixes { - u8 cidr; /* the different cidr values in the set */ - u32 nets; /* number of elements per cidr */ + u32 nets[IPSET_NET_COUNT]; /* number of elements per cidr */ + u8 cidr[IPSET_NET_COUNT]; /* the different cidr values in the set */ }; /* Compute the hash table size */ @@ -114,23 +117,6 @@ htable_bits(u32 hashsize) return bits; } -/* Destroy the hashtable part of the set */ -static void -ahash_destroy(struct htable *t) -{ - struct hbucket *n; - u32 i; - - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = hbucket(t, i); - if (n->size) - /* FIXME: use slab cache */ - kfree(n->value); - } - - ip_set_free(t); -} - static int hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) { @@ -156,30 +142,30 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) } #ifdef IP_SET_HASH_WITH_NETS +#if IPSET_NET_COUNT > 1 +#define __CIDR(cidr, i) (cidr[i]) +#else +#define __CIDR(cidr, i) (cidr) +#endif #ifdef IP_SET_HASH_WITH_NETS_PACKED /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */ -#define CIDR(cidr) (cidr + 1) +#define CIDR(cidr, i) (__CIDR(cidr, i) + 1) #else -#define CIDR(cidr) (cidr) +#define CIDR(cidr, i) (__CIDR(cidr, i)) #endif #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) #ifdef IP_SET_HASH_WITH_MULTI -#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1) +#define NLEN(family) (SET_HOST_MASK(family) + 1) #else -#define NETS_LENGTH(family) SET_HOST_MASK(family) +#define NLEN(family) SET_HOST_MASK(family) #endif #else -#define NETS_LENGTH(family) 0 +#define NLEN(family) 0 #endif /* IP_SET_HASH_WITH_NETS */ -#define ext_timeout(e, h) \ -(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT]) -#define ext_counter(e, h) \ -(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER]) - #endif /* _IP_SET_HASH_GEN_H */ /* Family dependent templates */ @@ -194,6 +180,8 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef mtype_data_next #undef mtype_elem +#undef mtype_ahash_destroy +#undef mtype_ext_cleanup #undef mtype_add_cidr #undef mtype_del_cidr #undef mtype_ahash_memsize @@ -220,41 +208,44 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #undef HKEY -#define mtype_data_equal TOKEN(MTYPE, _data_equal) +#define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) #ifdef IP_SET_HASH_WITH_NETS -#define mtype_do_data_match TOKEN(MTYPE, _do_data_match) +#define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) #else #define mtype_do_data_match(d) 1 #endif -#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags) -#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags) -#define mtype_data_netmask TOKEN(MTYPE, _data_netmask) -#define mtype_data_list TOKEN(MTYPE, _data_list) -#define mtype_data_next TOKEN(MTYPE, _data_next) -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add_cidr TOKEN(MTYPE, _add_cidr) -#define mtype_del_cidr TOKEN(MTYPE, _del_cidr) -#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize) -#define mtype_flush TOKEN(MTYPE, _flush) -#define mtype_destroy TOKEN(MTYPE, _destroy) -#define mtype_gc_init TOKEN(MTYPE, _gc_init) -#define mtype_same_set TOKEN(MTYPE, _same_set) -#define mtype_kadt TOKEN(MTYPE, _kadt) -#define mtype_uadt TOKEN(MTYPE, _uadt) +#define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) +#define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) +#define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) +#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) +#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) +#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) +#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) +#define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) +#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) +#define mtype_flush IPSET_TOKEN(MTYPE, _flush) +#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) +#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) +#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) +#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype MTYPE -#define mtype_elem TOKEN(MTYPE, _elem) -#define mtype_add TOKEN(MTYPE, _add) -#define mtype_del TOKEN(MTYPE, _del) -#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs) -#define mtype_test TOKEN(MTYPE, _test) -#define mtype_expire TOKEN(MTYPE, _expire) -#define mtype_resize TOKEN(MTYPE, _resize) -#define mtype_head TOKEN(MTYPE, _head) -#define mtype_list TOKEN(MTYPE, _list) -#define mtype_gc TOKEN(MTYPE, _gc) -#define mtype_variant TOKEN(MTYPE, _variant) -#define mtype_data_match TOKEN(MTYPE, _data_match) +#define mtype_elem IPSET_TOKEN(MTYPE, _elem) +#define mtype_add IPSET_TOKEN(MTYPE, _add) +#define mtype_del IPSET_TOKEN(MTYPE, _del) +#define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) +#define mtype_test IPSET_TOKEN(MTYPE, _test) +#define mtype_expire IPSET_TOKEN(MTYPE, _expire) +#define mtype_resize IPSET_TOKEN(MTYPE, _resize) +#define mtype_head IPSET_TOKEN(MTYPE, _head) +#define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_variant IPSET_TOKEN(MTYPE, _variant) +#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) @@ -269,13 +260,10 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) /* The generic hash structure */ struct htype { - struct htable *table; /* the hash table */ + struct htable __rcu *table; /* the hash table */ u32 maxelem; /* max elements in the hash */ u32 elements; /* current element (vs timeout) */ u32 initval; /* random jhash init value */ - u32 timeout; /* timeout value, if enabled */ - size_t dsize; /* data struct size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ struct timer_list gc; /* garbage collection when timeout enabled */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_MULTI @@ -297,49 +285,49 @@ struct htype { /* Network cidr size book keeping when the hash stores different * sized networks */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { int i, j; /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) { + for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) { if (j != -1) continue; - else if (h->nets[i].cidr < cidr) + else if (h->nets[i].cidr[n] < cidr) j = i; - else if (h->nets[i].cidr == cidr) { - h->nets[i].nets++; + else if (h->nets[i].cidr[n] == cidr) { + h->nets[i].nets[n]++; return; } } if (j != -1) { for (; i > j; i--) { - h->nets[i].cidr = h->nets[i - 1].cidr; - h->nets[i].nets = h->nets[i - 1].nets; + h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; + h->nets[i].nets[n] = h->nets[i - 1].nets[n]; } } - h->nets[i].cidr = cidr; - h->nets[i].nets = 1; + h->nets[i].cidr[n] = cidr; + h->nets[i].nets[n] = 1; } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) +mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) { u8 i, j, net_end = nets_length - 1; for (i = 0; i < nets_length; i++) { - if (h->nets[i].cidr != cidr) + if (h->nets[i].cidr[n] != cidr) continue; - if (h->nets[i].nets > 1 || i == net_end || - h->nets[i + 1].nets == 0) { - h->nets[i].nets--; + if (h->nets[i].nets[n] > 1 || i == net_end || + h->nets[i + 1].nets[n] == 0) { + h->nets[i].nets[n]--; return; } - for (j = i; j < net_end && h->nets[j].nets; j++) { - h->nets[j].cidr = h->nets[j + 1].cidr; - h->nets[j].nets = h->nets[j + 1].nets; + for (j = i; j < net_end && h->nets[j].nets[n]; j++) { + h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; + h->nets[j].nets[n] = h->nets[j + 1].nets[n]; } - h->nets[j].nets = 0; + h->nets[j].nets[n] = 0; return; } } @@ -347,10 +335,10 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) /* Calculate the actual memory size of the set data */ static size_t -mtype_ahash_memsize(const struct htype *h, u8 nets_length) +mtype_ahash_memsize(const struct htype *h, const struct htable *t, + u8 nets_length, size_t dsize) { u32 i; - struct htable *t = h->table; size_t memsize = sizeof(*h) + sizeof(*t) #ifdef IP_SET_HASH_WITH_NETS @@ -359,35 +347,70 @@ mtype_ahash_memsize(const struct htype *h, u8 nets_length) + jhash_size(t->htable_bits) * sizeof(struct hbucket); for (i = 0; i < jhash_size(t->htable_bits); i++) - memsize += t->bucket[i].size * h->dsize; + memsize += t->bucket[i].size * dsize; return memsize; } +/* Get the ith element from the array block n */ +#define ahash_data(n, i, dsize) \ + ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) + +static void +mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) +{ + int i; + + for (i = 0; i < n->pos; i++) + ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); +} + /* Flush a hash type of set: destroy all elements */ static void mtype_flush(struct ip_set *set) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; struct hbucket *n; u32 i; + t = rcu_dereference_bh_nfnl(h->table); for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); n->size = n->pos = 0; /* FIXME: use slab cache */ kfree(n->value); } } #ifdef IP_SET_HASH_WITH_NETS - memset(h->nets, 0, sizeof(struct net_prefixes) - * NETS_LENGTH(set->family)); + memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family)); #endif h->elements = 0; } +/* Destroy the hashtable part of the set */ +static void +mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) +{ + struct hbucket *n; + u32 i; + + for (i = 0; i < jhash_size(t->htable_bits); i++) { + n = hbucket(t, i); + if (n->size) { + if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + kfree(n->value); + } + } + + ip_set_free(t); +} + /* Destroy a hash type of set */ static void mtype_destroy(struct ip_set *set) @@ -397,7 +420,7 @@ mtype_destroy(struct ip_set *set) if (set->extensions & IPSET_EXT_TIMEOUT) del_timer_sync(&h->gc); - ahash_destroy(h->table); + mtype_ahash_destroy(set, rcu_dereference_bh_nfnl(h->table), true); #ifdef IP_SET_HASH_WITH_RBTREE rbtree_destroy(&h->rbtree); #endif @@ -414,10 +437,10 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&h->gc); h->gc.data = (unsigned long) set; h->gc.function = gc; - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(h->timeout)); + IPSET_GC_PERIOD(set->timeout)); } static bool @@ -428,37 +451,40 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) /* Resizing changes htable_bits, so we ignore it */ return x->maxelem == y->maxelem && - x->timeout == y->timeout && + a->timeout == b->timeout && #ifdef IP_SET_HASH_WITH_NETMASK x->netmask == y->netmask && #endif a->extensions == b->extensions; } -/* Get the ith element from the array block n */ -#define ahash_data(n, i, dsize) \ - ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) - /* Delete expired elements from the hashtable */ static void -mtype_expire(struct htype *h, u8 nets_length, size_t dsize) +mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) { - struct htable *t = h->table; + struct htable *t; struct hbucket *n; struct mtype_elem *data; u32 i; int j; +#ifdef IP_SET_HASH_WITH_NETS + u8 k; +#endif + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); for (i = 0; i < jhash_size(t->htable_bits); i++) { n = hbucket(t, i); for (j = 0; j < n->pos; j++) { data = ahash_data(n, j, dsize); - if (ip_set_timeout_expired(ext_timeout(data, h))) { + if (ip_set_timeout_expired(ext_timeout(data, set))) { pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), - nets_length); + for (k = 0; k < IPSET_NET_COUNT; k++) + mtype_del_cidr(h, CIDR(data->cidr, k), + nets_length, k); #endif + ip_set_ext_destroy(set, data); if (j != n->pos - 1) /* Not last one */ memcpy(data, @@ -481,6 +507,7 @@ mtype_expire(struct htype *h, u8 nets_length, size_t dsize) n->value = tmp; } } + rcu_read_unlock_bh(); } static void @@ -491,10 +518,10 @@ mtype_gc(unsigned long ul_set) pr_debug("called\n"); write_lock_bh(&set->lock); - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); - h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; + h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&h->gc); } @@ -505,7 +532,7 @@ static int mtype_resize(struct ip_set *set, bool retried) { struct htype *h = set->data; - struct htable *t, *orig = h->table; + struct htable *t, *orig = rcu_dereference_bh_nfnl(h->table); u8 htable_bits = orig->htable_bits; #ifdef IP_SET_HASH_WITH_NETS u8 flags; @@ -520,8 +547,7 @@ mtype_resize(struct ip_set *set, bool retried) if (SET_WITH_TIMEOUT(set) && !retried) { i = h->elements; write_lock_bh(&set->lock); - mtype_expire(set->data, NETS_LENGTH(set->family), - h->dsize); + mtype_expire(set, set->data, NLEN(set->family), set->dsize); write_unlock_bh(&set->lock); if (h->elements < i) return 0; @@ -548,25 +574,25 @@ retry: for (i = 0; i < jhash_size(orig->htable_bits); i++) { n = hbucket(orig, i); for (j = 0; j < n->pos; j++) { - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS flags = 0; mtype_data_reset_flags(data, &flags); #endif m = hbucket(t, HKEY(data, h->initval, htable_bits)); - ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(m, AHASH_MAX(h), set->dsize); if (ret < 0) { #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(data, &flags); #endif read_unlock_bh(&set->lock); - ahash_destroy(t); + mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) goto retry; return ret; } - d = ahash_data(m, m->pos++, h->dsize); - memcpy(d, data, h->dsize); + d = ahash_data(m, m->pos++, set->dsize); + memcpy(d, data, set->dsize); #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(d, &flags); #endif @@ -581,7 +607,7 @@ retry: pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - ahash_destroy(orig); + mtype_ahash_destroy(set, orig, false); return 0; } @@ -604,7 +630,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) /* FIXME: when set is full, we slow down here */ - mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + mtype_expire(set, h, NLEN(set->family), set->dsize); if (h->elements >= h->maxelem) { if (net_ratelimit()) @@ -618,11 +644,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { if (flag_exist || (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) { + ip_set_timeout_expired(ext_timeout(data, set)))) { /* Just the extensions could be overwritten */ j = i; goto reuse_slot; @@ -633,30 +659,37 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } /* Reuse first timed out entry */ if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)) && + ip_set_timeout_expired(ext_timeout(data, set)) && j != AHASH_MAX(h) + 1) j = i; } reuse_slot: if (j != AHASH_MAX(h) + 1) { /* Fill out reused slot */ - data = ahash_data(n, j, h->dsize); + data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family)); - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + for (i = 0; i < IPSET_NET_COUNT; i++) { + mtype_del_cidr(h, CIDR(data->cidr, i), + NLEN(set->family), i); + mtype_add_cidr(h, CIDR(d->cidr, i), + NLEN(set->family), i); + } #endif + ip_set_ext_destroy(set, data); } else { /* Use/create a new slot */ TUNE_AHASH_MAX(h, multi); - ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize); + ret = hbucket_elem_add(n, AHASH_MAX(h), set->dsize); if (ret != 0) { if (ret == -EAGAIN) mtype_data_next(&h->next, d); goto out; } - data = ahash_data(n, n->pos++, h->dsize); + data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS - mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + for (i = 0; i < IPSET_NET_COUNT; i++) + mtype_add_cidr(h, CIDR(d->cidr, i), NLEN(set->family), + i); #endif h->elements++; } @@ -665,9 +698,11 @@ reuse_slot: mtype_data_set_flags(data, flags); #endif if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(data, h), ext->timeout); + ip_set_timeout_set(ext_timeout(data, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(data, h), ext); + ip_set_init_counter(ext_counter(data, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(data, set), ext); out: rcu_read_unlock_bh(); @@ -682,47 +717,60 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; - int i; + int i, ret = -IPSET_ERR_EXIST; +#ifdef IP_SET_HASH_WITH_NETS + u8 j; +#endif u32 key, multi = 0; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h))) - return -IPSET_ERR_EXIST; + ip_set_timeout_expired(ext_timeout(data, set))) + goto out; if (i != n->pos - 1) /* Not last one */ - memcpy(data, ahash_data(n, n->pos - 1, h->dsize), - h->dsize); + memcpy(data, ahash_data(n, n->pos - 1, set->dsize), + set->dsize); n->pos--; h->elements--; #ifdef IP_SET_HASH_WITH_NETS - mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); + for (j = 0; j < IPSET_NET_COUNT; j++) + mtype_del_cidr(h, CIDR(d->cidr, j), NLEN(set->family), + j); #endif + ip_set_ext_destroy(set, data); if (n->pos + AHASH_INIT_SIZE < n->size) { void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) - * h->dsize, + * set->dsize, GFP_ATOMIC); - if (!tmp) - return 0; + if (!tmp) { + ret = 0; + goto out; + } n->size -= AHASH_INIT_SIZE; - memcpy(tmp, n->value, n->size * h->dsize); + memcpy(tmp, n->value, n->size * set->dsize); kfree(n->value); n->value = tmp; } - return 0; + ret = 0; + goto out; } - return -IPSET_ERR_EXIST; +out: + rcu_read_unlock_bh(); + return ret; } static inline int @@ -730,8 +778,7 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, struct ip_set_ext *mext, struct ip_set *set, u32 flags) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(data, - (struct htype *)(set->data)), + ip_set_update_counter(ext_counter(data, set), ext, mext, flags); return mtype_do_data_match(data); } @@ -745,25 +792,38 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t = rcu_dereference_bh(h->table); struct hbucket *n; struct mtype_elem *data; +#if IPSET_NET_COUNT == 2 + struct mtype_elem orig = *d; + int i, j = 0, k; +#else int i, j = 0; +#endif u32 key, multi = 0; - u8 nets_length = NETS_LENGTH(set->family); + u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].nets && !multi; j++) { - mtype_data_netmask(d, h->nets[j].cidr); + for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) { +#if IPSET_NET_COUNT == 2 + mtype_data_reset_elem(d, &orig); + mtype_data_netmask(d, h->nets[j].cidr[0], false); + for (k = 0; k < nets_length && h->nets[k].nets[1] && !multi; + k++) { + mtype_data_netmask(d, h->nets[k].cidr[1], true); +#else + mtype_data_netmask(d, h->nets[j].cidr[0]); +#endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_WITH_TIMEOUT(set)) { if (!ip_set_timeout_expired( - ext_timeout(data, h))) + ext_timeout(data, set))) return mtype_data_match(data, ext, mext, set, flags); @@ -774,6 +834,9 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, return mtype_data_match(data, ext, mext, set, flags); } +#if IPSET_NET_COUNT == 2 + } +#endif } return 0; } @@ -785,30 +848,41 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; - struct htable *t = h->table; + struct htable *t; struct mtype_elem *d = value; struct hbucket *n; struct mtype_elem *data; - int i; + int i, ret = 0; u32 key, multi = 0; + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, * try all possible network sizes */ - if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) - return mtype_test_cidrs(set, d, ext, mext, flags); + for (i = 0; i < IPSET_NET_COUNT; i++) + if (CIDR(d->cidr, i) != SET_HOST_MASK(set->family)) + break; + if (i == IPSET_NET_COUNT) { + ret = mtype_test_cidrs(set, d, ext, mext, flags); + goto out; + } #endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); for (i = 0; i < n->pos; i++) { - data = ahash_data(n, i, h->dsize); + data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi) && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, h)))) - return mtype_data_match(data, ext, mext, set, flags); + ip_set_timeout_expired(ext_timeout(data, set)))) { + ret = mtype_data_match(data, ext, mext, set, flags); + goto out; + } } - return 0; +out: + rcu_read_unlock_bh(); + return ret; } /* Reply a HEADER request: fill out the header part of the set */ @@ -816,18 +890,18 @@ static int mtype_head(struct ip_set *set, struct sk_buff *skb) { const struct htype *h = set->data; + const struct htable *t; struct nlattr *nested; size_t memsize; - read_lock_bh(&set->lock); - memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family)); - read_unlock_bh(&set->lock); + t = rcu_dereference_bh_nfnl(h->table); + memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, - htonl(jhash_size(h->table->htable_bits))) || + htonl(jhash_size(t->htable_bits))) || nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) goto nla_put_failure; #ifdef IP_SET_HASH_WITH_NETMASK @@ -836,12 +910,9 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) goto nla_put_failure; #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || - nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - ((set->extensions & IPSET_EXT_TIMEOUT) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) || - ((set->extensions & IPSET_EXT_COUNTER) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS)))) + nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -856,7 +927,7 @@ mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { const struct htype *h = set->data; - const struct htable *t = h->table; + const struct htable *t = rcu_dereference_bh_nfnl(h->table); struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; @@ -874,9 +945,9 @@ mtype_list(const struct ip_set *set, n = hbucket(t, cb->args[2]); pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); for (i = 0; i < n->pos; i++) { - e = ahash_data(n, i, h->dsize); + e = ahash_data(n, i, set->dsize); if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, h))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[2], n, i, e); @@ -890,13 +961,7 @@ mtype_list(const struct ip_set *set, } if (mtype_data_list(skb, e)) goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, h))))) - goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, h))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -909,24 +974,24 @@ mtype_list(const struct ip_set *set, nla_put_failure: nlmsg_trim(skb, incomplete); - ipset_nest_end(skb, atd); if (unlikely(first == cb->args[2])) { pr_warning("Can't list set %s: one bucket does not fit into " "a message. Please report it!\n", set->name); cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, atd); return 0; } static int -TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, - const struct xt_action_param *par, - enum ipset_adt adt, struct ip_set_adt_opt *opt); +IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt); static int -TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], - enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); +IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, @@ -946,16 +1011,17 @@ static const struct ip_set_type_variant mtype_variant = { #ifdef IP_SET_EMIT_CREATE static int -TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, + struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; - u32 cadt_flags = 0; u8 hbits; #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; #endif size_t hsize; struct HTYPE *h; + struct htable *t; if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; @@ -1005,7 +1071,7 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) h->netmask = netmask; #endif get_random_bytes(&h->initval, sizeof(h->initval)); - h->timeout = IPSET_NO_TIMEOUT; + set->timeout = IPSET_NO_TIMEOUT; hbits = htable_bits(hashsize); hsize = htable_size(hbits); @@ -1013,91 +1079,37 @@ TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) kfree(h); return -ENOMEM; } - h->table = ip_set_alloc(hsize); - if (!h->table) { + t = ip_set_alloc(hsize); + if (!t) { kfree(h); return -ENOMEM; } - h->table->htable_bits = hbits; + t->htable_bits = hbits; + rcu_assign_pointer(h->table, t); set->data = h; - if (set->family == NFPROTO_IPV4) - set->variant = &TOKEN(HTYPE, 4_variant); - else - set->variant = &TOKEN(HTYPE, 6_variant); - - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = - ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - h->dsize = - sizeof(struct TOKEN(HTYPE, 4ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - timeout); - h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4ct_elem), - counter); - TOKEN(HTYPE, 4_gc_init)(set, - TOKEN(HTYPE, 4_gc)); - } else { - h->dsize = - sizeof(struct TOKEN(HTYPE, 6ct_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - timeout); - h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6ct_elem), - counter); - TOKEN(HTYPE, 6_gc_init)(set, - TOKEN(HTYPE, 6_gc)); - } - } else { - if (set->family == NFPROTO_IPV4) { - h->dsize = - sizeof(struct TOKEN(HTYPE, 4c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 4c_elem), - counter); - } else { - h->dsize = - sizeof(struct TOKEN(HTYPE, 6c_elem)); - h->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct TOKEN(HTYPE, 6c_elem), - counter); - } - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - set->extensions |= IPSET_EXT_TIMEOUT; - if (set->family == NFPROTO_IPV4) { - h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 4t_elem), - timeout); - TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc)); - } else { - h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem)); - h->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct TOKEN(HTYPE, 6t_elem), - timeout); - TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc)); - } + if (set->family == NFPROTO_IPV4) { + set->variant = &IPSET_TOKEN(HTYPE, 4_variant); + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 4_elem))); } else { + set->variant = &IPSET_TOKEN(HTYPE, 6_variant); + set->dsize = ip_set_elem_len(set, tb, + sizeof(struct IPSET_TOKEN(HTYPE, 6_elem))); + } + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); if (set->family == NFPROTO_IPV4) - h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem)); + IPSET_TOKEN(HTYPE, 4_gc_init)(set, + IPSET_TOKEN(HTYPE, 4_gc)); else - h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem)); + IPSET_TOKEN(HTYPE, 6_gc_init)(set, + IPSET_TOKEN(HTYPE, 6_gc)); } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", - set->name, jhash_size(h->table->htable_bits), - h->table->htable_bits, h->maxelem, set->data, h->table); + set->name, jhash_size(t->htable_bits), + t->htable_bits, h->maxelem, set->data, t); return 0; } diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index c74e6e14cd93..e65fc2423d56 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -23,19 +23,20 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counters support */ +#define IPSET_TYPE_REV_MAX 2 /* Comments support */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip"); /* Type specific function prefix */ #define HTYPE hash_ip #define IP_SET_HASH_WITH_NETMASK -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ip4_elem { @@ -43,22 +44,6 @@ struct hash_ip4_elem { __be32 ip; }; -struct hash_ip4t_elem { - __be32 ip; - unsigned long timeout; -}; - -struct hash_ip4c_elem { - __be32 ip; - struct ip_set_counter counter; -}; - -struct hash_ip4ct_elem { - __be32 ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -99,7 +84,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); __be32 ip; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); @@ -118,8 +103,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, hosts; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, hosts; int ret = 0; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -178,29 +163,13 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ /* Member elements */ struct hash_ip6_elem { union nf_inet_addr ip; }; -struct hash_ip6t_elem { - union nf_inet_addr ip; - unsigned long timeout; -}; - -struct hash_ip6c_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; -}; - -struct hash_ip6ct_elem { - union nf_inet_addr ip; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -253,7 +222,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); hash_ip6_netmask(&e.ip, h->netmask); @@ -270,7 +239,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip6_elem e = {}; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -304,8 +273,8 @@ static struct ip_set_type hash_ip_type __read_mostly = { .features = IPSET_TYPE_IP, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -324,6 +293,7 @@ static struct ip_set_type hash_ip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 7a2d2bd98d04..525a595dd1fe 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -24,19 +24,20 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Counters support added */ +#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ #define HTYPE hash_ipport -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipport4_elem { @@ -46,31 +47,6 @@ struct hash_ipport4_elem { u8 padding; }; -struct hash_ipport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -116,10 +92,9 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -136,8 +111,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -222,7 +197,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipport6_elem { union nf_inet_addr ip; @@ -231,31 +206,6 @@ struct hash_ipport6_elem { u8 padding; }; -struct hash_ipport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -306,10 +256,9 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -326,7 +275,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; @@ -396,8 +345,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -419,6 +368,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 34e8a1acce42..f5636631466e 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -24,19 +24,20 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -#define REVISION_MAX 2 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Counters support added */ +#define IPSET_TYPE_REV_MAX 3 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,ip"); /* Type specific function prefix */ #define HTYPE hash_ipportip -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportip4_elem { @@ -47,34 +48,6 @@ struct hash_ipportip4_elem { u8 padding; }; -struct hash_ipportip4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - static inline bool hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1, const struct hash_ipportip4_elem *ip2, @@ -120,10 +93,9 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -141,8 +113,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip, ip_to = 0, p = 0, port, port_to; bool with_ports = false; int ret; @@ -231,7 +203,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportip6_elem { union nf_inet_addr ip; @@ -241,34 +213,6 @@ struct hash_ipportip6_elem { u8 padding; }; -struct hash_ipportip6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - unsigned long timeout; -}; - -struct hash_ipportip6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; -}; - -struct hash_ipportip6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 proto; - u8 padding; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -319,10 +263,9 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) @@ -340,7 +283,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportip *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip6_elem e = { }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; @@ -414,8 +357,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportip_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -437,6 +380,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index f15f3e28b9c3..5d87fe8a41ff 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -24,15 +24,16 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +/* 4 Counters support added */ +#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ @@ -46,7 +47,7 @@ MODULE_ALIAS("ip_set_hash:ip,port,net"); #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_ipportnet4_elem { @@ -58,37 +59,6 @@ struct hash_ipportnet4_elem { u8 proto; }; -struct hash_ipportnet4t_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet4c_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet4ct_elem { - __be32 ip; - __be32 ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -170,9 +140,9 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -195,9 +165,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip, ip_to, p = 0, port, port_to; - u32 ip2_from, ip2_to, ip2_last, ip2; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; bool with_ports = false; u8 cidr; int ret; @@ -272,7 +242,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { - u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > 32) return -IPSET_ERR_INVALID_CIDR; @@ -306,9 +276,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], : port; for (; p <= port_to; p++) { e.port = htons(p); - ip2 = retried - && ip == ntohl(h->next.ip) - && p == ntohs(h->next.port) + ip2 = retried && + ip == ntohl(h->next.ip) && + p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; while (!after(ip2, ip2_to)) { e.ip2 = htonl(ip2); @@ -328,7 +298,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_ipportnet6_elem { union nf_inet_addr ip; @@ -339,37 +309,6 @@ struct hash_ipportnet6_elem { u8 proto; }; -struct hash_ipportnet6t_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - unsigned long timeout; -}; - -struct hash_ipportnet6c_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; -}; - -struct hash_ipportnet6ct_elem { - union nf_inet_addr ip; - union nf_inet_addr ip2; - __be16 port; - u8 cidr:7; - u8 nomatch:1; - u8 proto; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -454,9 +393,9 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -479,7 +418,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_ipportnet *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; @@ -574,8 +513,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -600,6 +539,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 223e9f546d0f..8295cf4f9fdc 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -22,21 +22,22 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 Range as input support for IPv4 added */ -/* 2 nomatch flag support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Range as input support for IPv4 added */ +/* 2 nomatch flag support added */ +/* 3 Counters support added */ +#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net"); /* Type specific function prefix */ #define HTYPE hash_net #define IP_SET_HASH_WITH_NETS -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_net4_elem { @@ -46,31 +47,6 @@ struct hash_net4_elem { u8 cidr; }; -struct hash_net4t_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net4c_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net4ct_elem { - __be32 ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -143,9 +119,9 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -165,8 +141,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip = 0, ip_to, last; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, last; int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -228,7 +204,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_net6_elem { union nf_inet_addr ip; @@ -237,31 +213,6 @@ struct hash_net6_elem { u8 cidr; }; -struct hash_net6t_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - unsigned long timeout; -}; - -struct hash_net6c_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; -}; - -struct hash_net6ct_elem { - union nf_inet_addr ip; - u16 padding0; - u8 nomatch; - u8 cidr; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -338,9 +289,9 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; @@ -357,10 +308,9 @@ static int hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = HOST_MASK }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (unlikely(!tb[IPSET_ATTR_IP] || @@ -406,8 +356,8 @@ static struct ip_set_type hash_net_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -425,6 +375,7 @@ static struct ip_set_type hash_net_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 7d798d5d5cd3..3f64a66bf5d9 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -23,14 +23,15 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 nomatch flag support added */ -/* 2 /0 support added */ -#define REVISION_MAX 3 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 nomatch flag support added */ +/* 2 /0 support added */ +/* 3 Counters support added */ +#define IPSET_TYPE_REV_MAX 4 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,iface", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,iface"); /* Interface name rbtree */ @@ -134,7 +135,7 @@ iface_add(struct rb_root *root, const char **iface) #define STREQ(a, b) (strcmp(a, b) == 0) -/* IPv4 variants */ +/* IPv4 variant */ struct hash_netiface4_elem_hashed { __be32 ip; @@ -144,7 +145,7 @@ struct hash_netiface4_elem_hashed { u8 elem; }; -/* Member elements without timeout */ +/* Member elements */ struct hash_netiface4_elem { __be32 ip; u8 physdev; @@ -154,37 +155,6 @@ struct hash_netiface4_elem { const char *iface; }; -struct hash_netiface4t_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface4c_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface4ct_elem { - __be32 ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -265,10 +235,10 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -319,8 +289,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 ip = 0, ip_to, last; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, last; char iface[IFNAMSIZ]; int ret; @@ -399,7 +369,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netiface6_elem_hashed { union nf_inet_addr ip; @@ -418,37 +388,6 @@ struct hash_netiface6_elem { const char *iface; }; -struct hash_netiface6t_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - unsigned long timeout; -}; - -struct hash_netiface6c_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; -}; - -struct hash_netiface6ct_elem { - union nf_inet_addr ip; - u8 physdev; - u8 cidr; - u8 nomatch; - u8 elem; - const char *iface; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -534,10 +473,10 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), .elem = 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); int ret; if (e.cidr == 0) @@ -584,7 +523,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netiface *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); char iface[IFNAMSIZ]; int ret; @@ -645,8 +584,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = { IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netiface_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -668,6 +607,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = { [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c new file mode 100644 index 000000000000..426032706ca9 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -0,0 +1,483 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:net type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); +IP_SET_MODULE_DESC("hash:net,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:net,net"); + +/* Type specific function prefix */ +#define HTYPE hash_netnet +#define IP_SET_HASH_WITH_NETS +#define IPSET_NET_COUNT 2 + +/* IPv4 variants */ + +/* Member elements */ +struct hash_netnet4_elem { + union { + __be32 ip[2]; + __be64 ipcmp; + }; + u8 nomatch; + union { + u8 cidr[2]; + u16 ccmp; + }; +}; + +/* Common functions */ + +static inline bool +hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, + const struct hash_netnet4_elem *ip2, + u32 *multi) +{ + return ip1->ipcmp == ip2->ipcmp && + ip2->ccmp == ip2->ccmp; +} + +static inline int +hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags) +{ + elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; +} + +static inline void +hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem, + struct hash_netnet4_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner) +{ + if (inner) { + elem->ip[1] &= ip_set_netmask(cidr); + elem->cidr[1] = cidr; + } else { + elem->ip[0] &= ip_set_netmask(cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netnet4_data_list(struct sk_buff *skb, + const struct hash_netnet4_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netnet4_data_next(struct hash_netnet4_elem *next, + const struct hash_netnet4_elem *d) +{ + next->ipcmp = d->ipcmp; +} + +#define MTYPE hash_netnet4 +#define PF 4 +#define HOST_MASK 32 +#include "ip_set_hash_gen.h" + +static int +hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { + .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, + .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK, + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]); + ip4addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1]); + e.ip[0] &= ip_set_netmask(e.cidr[0]); + e.ip[1] &= ip_set_netmask(e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, last; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, last2; + u8 cidr, cidr2; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || + ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[0] = cidr; + } + + if (tb[IPSET_ATTR_CIDR2]) { + cidr2 = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!cidr2 || cidr2 > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[1] = cidr2; + } + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] && + tb[IPSET_ATTR_IP2_TO])) { + e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); + e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip_to < ip) + swap(ip, ip_to); + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } + + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_to < ip2_from) + swap(ip2_from, ip2_to); + if (ip2_from + UINT_MAX == ip2_to) + return -IPSET_ERR_HASH_RANGE; + + } + + if (retried) + ip = ntohl(h->next.ip[0]); + + while (!after(ip, ip_to)) { + e.ip[0] = htonl(ip); + last = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr[0] = cidr; + ip2 = (retried && + ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) + : ip2_from; + while (!after(ip2, ip2_to)) { + e.ip[1] = htonl(ip2); + last2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr2); + e.cidr[1] = cidr2; + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = last2 + 1; + } + ip = last + 1; + } + return ret; +} + +/* IPv6 variants */ + +struct hash_netnet6_elem { + union nf_inet_addr ip[2]; + u8 nomatch; + union { + u8 cidr[2]; + u16 ccmp; + }; +}; + +/* Common functions */ + +static inline bool +hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1, + const struct hash_netnet6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && + ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && + ip1->ccmp == ip2->ccmp; +} + +static inline int +hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags) +{ + elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; +} + +static inline void +hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem, + struct hash_netnet6_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner) +{ + if (inner) { + ip6_netmask(&elem->ip[1], cidr); + elem->cidr[1] = cidr; + } else { + ip6_netmask(&elem->ip[0], cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netnet6_data_list(struct sk_buff *skb, + const struct hash_netnet6_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netnet6_data_next(struct hash_netnet4_elem *next, + const struct hash_netnet6_elem *d) +{ +} + +#undef MTYPE +#undef PF +#undef HOST_MASK + +#define MTYPE hash_netnet6 +#define PF 6 +#define HOST_MASK 128 +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet6_elem e = { + .cidr[0] = h->nets[0].cidr[0] ? h->nets[0].cidr[0] : HOST_MASK, + .cidr[1] = h->nets[0].cidr[1] ? h->nets[0].cidr[1] : HOST_MASK + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6); + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet6_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || + ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (tb[IPSET_ATTR_CIDR2]) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || + e.cidr[1] > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + ret = adtfn(set, &e, &ext, &ext, flags); + + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; +} + +static struct ip_set_type hash_netnet_type __read_mostly = { + .name = "hash:net,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH, + .dimension = IPSET_DIM_TWO, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_netnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netnet_init(void) +{ + return ip_set_type_register(&hash_netnet_type); +} + +static void __exit +hash_netnet_fini(void) +{ + ip_set_type_unregister(&hash_netnet_type); +} + +module_init(hash_netnet_init); +module_exit(hash_netnet_fini); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09d6690bee6f..7097fb0141bf 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -23,15 +23,16 @@ #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> -#define REVISION_MIN 0 -/* 1 SCTP and UDPLITE support added */ -/* 2 Range as input support for IPv4 added */ -/* 3 nomatch flag support added */ -#define REVISION_MAX 4 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 SCTP and UDPLITE support added */ +/* 2 Range as input support for IPv4 added */ +/* 3 nomatch flag support added */ +/* 4 Counters support added */ +#define IPSET_TYPE_REV_MAX 5 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("hash:net,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net,port"); /* Type specific function prefix */ @@ -45,7 +46,7 @@ MODULE_ALIAS("ip_set_hash:net,port"); */ #define IP_SET_HASH_WITH_NETS_PACKED -/* IPv4 variants */ +/* IPv4 variant */ /* Member elements */ struct hash_netport4_elem { @@ -56,34 +57,6 @@ struct hash_netport4_elem { u8 nomatch:1; }; -struct hash_netport4t_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport4c_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport4ct_elem { - __be32 ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -162,9 +135,9 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -186,8 +159,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); - u32 port, port_to, p = 0, ip = 0, ip_to, last; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 port, port_to, p = 0, ip = 0, ip_to = 0, last; bool with_ports = false; u8 cidr; int ret; @@ -287,7 +260,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; } -/* IPv6 variants */ +/* IPv6 variant */ struct hash_netport6_elem { union nf_inet_addr ip; @@ -297,34 +270,6 @@ struct hash_netport6_elem { u8 nomatch:1; }; -struct hash_netport6t_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - unsigned long timeout; -}; - -struct hash_netport6c_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; -}; - -struct hash_netport6ct_elem { - union nf_inet_addr ip; - __be16 port; - u8 proto; - u8 cidr:7; - u8 nomatch:1; - struct ip_set_counter counter; - unsigned long timeout; -}; - /* Common functions */ static inline bool @@ -407,9 +352,9 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { - .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1, + .cidr = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK) - 1, }; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; @@ -431,7 +376,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], const struct hash_netport *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(h); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; @@ -518,8 +463,8 @@ static struct ip_set_type hash_netport_type __read_mostly = { .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = hash_netport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, @@ -542,6 +487,7 @@ static struct ip_set_type hash_netport_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c new file mode 100644 index 000000000000..363fab933d48 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -0,0 +1,588 @@ +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the hash:ip,port,net type */ + +#include <linux/jhash.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/skbuff.h> +#include <linux/errno.h> +#include <linux/random.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/netlink.h> +#include <net/tcp.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/ipset/pfxlen.h> +#include <linux/netfilter/ipset/ip_set.h> +#include <linux/netfilter/ipset/ip_set_getport.h> +#include <linux/netfilter/ipset/ip_set_hash.h> + +#define IPSET_TYPE_REV_MIN 0 +#define IPSET_TYPE_REV_MAX 0 /* Comments support added */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>"); +IP_SET_MODULE_DESC("hash:net,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); +MODULE_ALIAS("ip_set_hash:net,port,net"); + +/* Type specific function prefix */ +#define HTYPE hash_netportnet +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS +#define IPSET_NET_COUNT 2 + +/* IPv4 variant */ + +/* Member elements */ +struct hash_netportnet4_elem { + union { + __be32 ip[2]; + __be64 ipcmp; + }; + __be16 port; + union { + u8 cidr[2]; + u16 ccmp; + }; + u8 nomatch:1; + u8 proto; +}; + +/* Common functions */ + +static inline bool +hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1, + const struct hash_netportnet4_elem *ip2, + u32 *multi) +{ + return ip1->ipcmp == ip2->ipcmp && + ip1->ccmp == ip2->ccmp && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline int +hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags) +{ + elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem, + struct hash_netportnet4_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem, + u8 cidr, bool inner) +{ + if (inner) { + elem->ip[1] &= ip_set_netmask(cidr); + elem->cidr[1] = cidr; + } else { + elem->ip[0] &= ip_set_netmask(cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netportnet4_data_list(struct sk_buff *skb, + const struct hash_netportnet4_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip[0]) || + nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip[1]) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netportnet4_data_next(struct hash_netportnet4_elem *next, + const struct hash_netportnet4_elem *d) +{ + next->ipcmp = d->ipcmp; + next->port = d->port; +} + +#define MTYPE hash_netportnet4 +#define PF 4 +#define HOST_MASK 32 +#include "ip_set_hash_gen.h" + +static int +hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { + .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(e.cidr[0]) * 8)) | HOST_MASK; + + if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, + &e.port, &e.proto)) + return -EINVAL; + + ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0]); + ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1]); + e.ip[0] &= ip_set_netmask(e.cidr[0]); + e.ip[1] &= ip_set_netmask(e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, ip_last, p = 0, port, port_to; + u32 ip2_from = 0, ip2_to = 0, ip2_last, ip2; + bool with_ports = false; + u8 cidr, cidr2; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || + ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[0] = cidr; + } + + if (tb[IPSET_ATTR_CIDR2]) { + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + if (!cidr || cidr > HOST_MASK) + return -IPSET_ERR_INVALID_CIDR; + e.cidr[1] = cidr; + } + + if (tb[IPSET_ATTR_PORT]) + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(e.proto); + + if (e.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + if (!(with_ports || e.proto == IPPROTO_ICMP)) + e.port = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; + if (adt == IPSET_TEST || + !(tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) { + e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0])); + e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1])); + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + ip_to = ip; + if (tb[IPSET_ATTR_IP_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); + if (ret) + return ret; + if (ip > ip_to) + swap(ip, ip_to); + if (unlikely(ip + UINT_MAX == ip_to)) + return -IPSET_ERR_HASH_RANGE; + } + + port_to = port = ntohs(e.port); + if (tb[IPSET_ATTR_PORT_TO]) { + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + } + + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); + if (ret) + return ret; + if (ip2_from > ip2_to) + swap(ip2_from, ip2_to); + if (unlikely(ip2_from + UINT_MAX == ip2_to)) + return -IPSET_ERR_HASH_RANGE; + } + + if (retried) + ip = ntohl(h->next.ip[0]); + + while (!after(ip, ip_to)) { + e.ip[0] = htonl(ip); + ip_last = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr[0] = cidr; + p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) + : port; + for (; p <= port_to; p++) { + e.port = htons(p); + ip2 = (retried && ip == ntohl(h->next.ip[0]) && + p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) + : ip2_from; + while (!after(ip2, ip2_to)) { + e.ip[1] = htonl(ip2); + ip2_last = ip_set_range_to_cidr(ip2, ip2_to, + &cidr2); + e.cidr[1] = cidr2; + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + ip2 = ip2_last + 1; + } + } + ip = ip_last + 1; + } + return ret; +} + +/* IPv6 variant */ + +struct hash_netportnet6_elem { + union nf_inet_addr ip[2]; + __be16 port; + union { + u8 cidr[2]; + u16 ccmp; + }; + u8 nomatch:1; + u8 proto; +}; + +/* Common functions */ + +static inline bool +hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1, + const struct hash_netportnet6_elem *ip2, + u32 *multi) +{ + return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) && + ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) && + ip1->ccmp == ip2->ccmp && + ip1->port == ip2->port && + ip1->proto == ip2->proto; +} + +static inline int +hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem) +{ + return elem->nomatch ? -ENOTEMPTY : 1; +} + +static inline void +hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags) +{ + elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); +} + +static inline void +hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags) +{ + swap(*flags, elem->nomatch); +} + +static inline void +hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem, + struct hash_netportnet6_elem *orig) +{ + elem->ip[1] = orig->ip[1]; +} + +static inline void +hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem, + u8 cidr, bool inner) +{ + if (inner) { + ip6_netmask(&elem->ip[1], cidr); + elem->cidr[1] = cidr; + } else { + ip6_netmask(&elem->ip[0], cidr); + elem->cidr[0] = cidr; + } +} + +static bool +hash_netportnet6_data_list(struct sk_buff *skb, + const struct hash_netportnet6_elem *data) +{ + u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; + + if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip[0].in6) || + nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip[1].in6) || + nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || + nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr[0]) || + nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr[1]) || + nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || + (flags && + nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return 1; +} + +static inline void +hash_netportnet6_data_next(struct hash_netportnet4_elem *next, + const struct hash_netportnet6_elem *d) +{ + next->port = d->port; +} + +#undef MTYPE +#undef PF +#undef HOST_MASK + +#define MTYPE hash_netportnet6 +#define PF 6 +#define HOST_MASK 128 +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h" + +static int +hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, + const struct xt_action_param *par, + enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet6_elem e = { + .cidr[0] = IP_SET_INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), + .cidr[1] = IP_SET_INIT_CIDR(h->nets[0].cidr[1], HOST_MASK), + }; + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); + + if (adt == IPSET_TEST) + e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK; + + if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, + &e.port, &e.proto)) + return -EINVAL; + + ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6); + ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip[1].in6); + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); +} + +static int +hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) +{ + const struct hash_netportnet *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet6_elem e = { .cidr[0] = HOST_MASK, + .cidr[1] = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 port, port_to; + bool with_ports = false; + int ret; + + if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || + !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || + !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES))) + return -IPSET_ERR_PROTOCOL; + if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO])) + return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; + + if (tb[IPSET_ATTR_LINENO]) + *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); + + ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) || + ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) || + ip_set_get_extensions(set, tb, &ext); + if (ret) + return ret; + + if (tb[IPSET_ATTR_CIDR]) + e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]); + + if (tb[IPSET_ATTR_CIDR2]) + e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]); + + if (unlikely(!e.cidr[0] || e.cidr[0] > HOST_MASK || !e.cidr[1] || + e.cidr[1] > HOST_MASK)) + return -IPSET_ERR_INVALID_CIDR; + + ip6_netmask(&e.ip[0], e.cidr[0]); + ip6_netmask(&e.ip[1], e.cidr[1]); + + if (tb[IPSET_ATTR_PORT]) + e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); + else + return -IPSET_ERR_PROTOCOL; + + if (tb[IPSET_ATTR_PROTO]) { + e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); + with_ports = ip_set_proto_with_ports(e.proto); + + if (e.proto == 0) + return -IPSET_ERR_INVALID_PROTO; + } else + return -IPSET_ERR_MISSING_PROTO; + + if (!(with_ports || e.proto == IPPROTO_ICMPV6)) + e.port = 0; + + if (tb[IPSET_ATTR_CADT_FLAGS]) { + u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); + if (cadt_flags & IPSET_FLAG_NOMATCH) + flags |= (IPSET_FLAG_NOMATCH << 16); + } + + if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { + ret = adtfn(set, &e, &ext, &ext, flags); + return ip_set_enomatch(ret, flags, adt, set) ? -ret : + ip_set_eexist(ret, flags) ? 0 : ret; + } + + port = ntohs(e.port); + port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); + if (port > port_to) + swap(port, port_to); + + if (retried) + port = ntohs(h->next.port); + for (; port <= port_to; port++) { + e.port = htons(port); + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) + return ret; + else + ret = 0; + } + return ret; +} + +static struct ip_set_type hash_netportnet_type __read_mostly = { + .name = "hash:net,port,net", + .protocol = IPSET_PROTOCOL, + .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 | + IPSET_TYPE_NOMATCH, + .dimension = IPSET_DIM_THREE, + .family = NFPROTO_UNSPEC, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, + .create = hash_netportnet_create, + .create_policy = { + [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, + [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, + [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, + [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + }, + .adt_policy = { + [IPSET_ATTR_IP] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, + [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, + [IPSET_ATTR_PORT] = { .type = NLA_U16 }, + [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, + [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, + [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, + [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, + [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, + [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, + [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, + [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, + }, + .me = THIS_MODULE, +}; + +static int __init +hash_netportnet_init(void) +{ + return ip_set_type_register(&hash_netportnet_type); +} + +static void __exit +hash_netportnet_fini(void) +{ + ip_set_type_unregister(&hash_netportnet_type); +} + +module_init(hash_netportnet_init); +module_exit(hash_netportnet_fini); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 979b8c90e422..ec6f6d15dded 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -15,12 +15,13 @@ #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_list.h> -#define REVISION_MIN 0 -#define REVISION_MAX 1 /* Counters support added */ +#define IPSET_TYPE_REV_MIN 0 +/* 1 Counters support added */ +#define IPSET_TYPE_REV_MAX 2 /* Comments support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX); +IP_SET_MODULE_DESC("list:set", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_list:set"); /* Member elements */ @@ -28,28 +29,6 @@ struct set_elem { ip_set_id_t id; }; -struct sett_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - unsigned long timeout; -}; - -struct setc_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; -}; - -struct setct_elem { - struct { - ip_set_id_t id; - } __attribute__ ((aligned)); - struct ip_set_counter counter; - unsigned long timeout; -}; - struct set_adt_elem { ip_set_id_t id; ip_set_id_t refid; @@ -58,24 +37,14 @@ struct set_adt_elem { /* Type structure */ struct list_set { - size_t dsize; /* element size */ - size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ u32 size; /* size of set list array */ - u32 timeout; /* timeout value */ struct timer_list gc; /* garbage collection */ + struct net *net; /* namespace */ struct set_elem members[0]; /* the set members */ }; -static inline struct set_elem * -list_set_elem(const struct list_set *map, u32 id) -{ - return (struct set_elem *)((void *)map->members + id * map->dsize); -} - -#define ext_timeout(e, m) \ -(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) -#define ext_counter(e, m) \ -(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER]) +#define list_set_elem(set, map, id) \ + (struct set_elem *)((void *)(map)->members + (id) * (set)->dsize) static int list_set_ktest(struct ip_set *set, const struct sk_buff *skb, @@ -92,16 +61,16 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_test(e->id, skb, par, opt); if (ret > 0) { if (SET_WITH_COUNTER(set)) - ip_set_update_counter(ext_counter(e, map), + ip_set_update_counter(ext_counter(e, set), ext, &opt->ext, cmdflags); return ret; @@ -121,11 +90,11 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_add(e->id, skb, par, opt); if (ret == 0) @@ -145,11 +114,11 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; ret = ip_set_del(e->id, skb, par, opt); if (ret == 0) @@ -163,8 +132,7 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { - struct list_set *map = set->data; - struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); switch (adt) { case IPSET_TEST: @@ -188,10 +156,10 @@ id_eq(const struct ip_set *set, u32 i, ip_set_id_t id) if (i >= map->size) return 0; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); return !!(e->id == id && !(SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map)))); + ip_set_timeout_expired(ext_timeout(e, set)))); } static int @@ -199,28 +167,36 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, const struct ip_set_ext *ext) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - if (i == map->size - 1) + if (i == map->size - 1) { /* Last element replaced: e.g. add new,before,last */ - ip_set_put_byindex(e->id); - else { - struct set_elem *x = list_set_elem(map, map->size - 1); + ip_set_put_byindex(map->net, e->id); + ip_set_ext_destroy(set, e); + } else { + struct set_elem *x = list_set_elem(set, map, + map->size - 1); /* Last element pushed off */ - if (x->id != IPSET_INVALID_ID) - ip_set_put_byindex(x->id); - memmove(list_set_elem(map, i + 1), e, - map->dsize * (map->size - (i + 1))); + if (x->id != IPSET_INVALID_ID) { + ip_set_put_byindex(map->net, x->id); + ip_set_ext_destroy(set, x); + } + memmove(list_set_elem(set, map, i + 1), e, + set->dsize * (map->size - (i + 1))); + /* Extensions must be initialized to zero */ + memset(e, 0, set->dsize); } } e->id = d->id; if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(e, set), ext); return 0; } @@ -228,16 +204,17 @@ static int list_set_del(struct ip_set *set, u32 i) { struct list_set *map = set->data; - struct set_elem *e = list_set_elem(map, i); + struct set_elem *e = list_set_elem(set, map, i); - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); + ip_set_ext_destroy(set, e); if (i < map->size - 1) - memmove(e, list_set_elem(map, i + 1), - map->dsize * (map->size - (i + 1))); + memmove(e, list_set_elem(set, map, i + 1), + set->dsize * (map->size - (i + 1))); /* Last element */ - e = list_set_elem(map, map->size - 1); + e = list_set_elem(set, map, map->size - 1); e->id = IPSET_INVALID_ID; return 0; } @@ -247,13 +224,16 @@ set_cleanup_entries(struct ip_set *set) { struct list_set *map = set->data; struct set_elem *e; - u32 i; + u32 i = 0; - for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + while (i < map->size) { + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) list_set_del(set, i); + /* Check element moved to position i in next loop */ + else + i++; } } @@ -268,11 +248,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, int ret; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return 0; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -299,14 +279,14 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool flag_exist = flags & IPSET_FLAG_EXIST; u32 i, ret = 0; + if (SET_WITH_TIMEOUT(set)) + set_cleanup_entries(set); + /* Check already added element */ for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto insert; - else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) - continue; else if (e->id != d->id) continue; @@ -319,18 +299,22 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Can't re-add */ return -IPSET_ERR_EXIST; /* Update extensions */ + ip_set_ext_destroy(set, e); + if (SET_WITH_TIMEOUT(set)) - ip_set_timeout_set(ext_timeout(e, map), ext->timeout); + ip_set_timeout_set(ext_timeout(e, set), ext->timeout); if (SET_WITH_COUNTER(set)) - ip_set_init_counter(ext_counter(e, map), ext); + ip_set_init_counter(ext_counter(e, set), ext); + if (SET_WITH_COMMENT(set)) + ip_set_init_comment(ext_comment(e, set), ext); /* Set is already added to the list */ - ip_set_put_byindex(d->id); + ip_set_put_byindex(map->net, d->id); return 0; } insert: ret = -IPSET_ERR_LIST_FULL; for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) ret = d->before != 0 ? -IPSET_ERR_REF_EXIST : list_set_add(set, i, d, ext); @@ -355,12 +339,12 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) return d->before != 0 ? -IPSET_ERR_REF_EXIST : -IPSET_ERR_EXIST; else if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; else if (e->id != d->id) continue; @@ -386,7 +370,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], struct list_set *map = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; - struct ip_set_ext ext = IP_SET_INIT_UEXT(map); + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); struct ip_set *s; int ret = 0; @@ -403,7 +387,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -423,7 +407,8 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], } if (tb[IPSET_ATTR_NAMEREF]) { - e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), + e.refid = ip_set_get_byname(map->net, + nla_data(tb[IPSET_ATTR_NAMEREF]), &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; @@ -439,9 +424,9 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], finish: if (e.refid != IPSET_INVALID_ID) - ip_set_put_byindex(e.refid); + ip_set_put_byindex(map->net, e.refid); if (adt != IPSET_ADD || ret) - ip_set_put_byindex(e.id); + ip_set_put_byindex(map->net, e.id); return ip_set_eexist(ret, flags) ? 0 : ret; } @@ -454,9 +439,10 @@ list_set_flush(struct ip_set *set) u32 i; for (i = 0; i < map->size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id != IPSET_INVALID_ID) { - ip_set_put_byindex(e->id); + ip_set_put_byindex(map->net, e->id); + ip_set_ext_destroy(set, e); e->id = IPSET_INVALID_ID; } } @@ -485,14 +471,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || - (SET_WITH_COUNTER(set) && - nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, - htonl(IPSET_FLAG_WITH_COUNTERS))) || nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, - htonl(sizeof(*map) + map->size * map->dsize))) + htonl(sizeof(*map) + map->size * set->dsize))) + goto nla_put_failure; + if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; ipset_nest_end(skb, nested); @@ -515,11 +498,11 @@ list_set_list(const struct ip_set *set, return -EMSGSIZE; for (; cb->args[2] < map->size; cb->args[2]++) { i = cb->args[2]; - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); if (e->id == IPSET_INVALID_ID) goto finish; if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, map))) + ip_set_timeout_expired(ext_timeout(e, set))) continue; nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { @@ -530,15 +513,9 @@ list_set_list(const struct ip_set *set, goto nla_put_failure; } if (nla_put_string(skb, IPSET_ATTR_NAME, - ip_set_name_byindex(e->id))) - goto nla_put_failure; - if (SET_WITH_TIMEOUT(set) && - nla_put_net32(skb, IPSET_ATTR_TIMEOUT, - htonl(ip_set_timeout_get( - ext_timeout(e, map))))) + ip_set_name_byindex(map->net, e->id))) goto nla_put_failure; - if (SET_WITH_COUNTER(set) && - ip_set_put_counter(skb, ext_counter(e, map))) + if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); } @@ -550,11 +527,11 @@ finish: nla_put_failure: nla_nest_cancel(skb, nested); - ipset_nest_end(skb, atd); if (unlikely(i == first)) { cb->args[2] = 0; return -EMSGSIZE; } + ipset_nest_end(skb, atd); return 0; } @@ -565,7 +542,7 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) const struct list_set *y = b->data; return x->size == y->size && - x->timeout == y->timeout && + a->timeout == b->timeout && a->extensions == b->extensions; } @@ -594,7 +571,7 @@ list_set_gc(unsigned long ul_set) set_cleanup_entries(set); write_unlock_bh(&set->lock); - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } @@ -606,43 +583,40 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) init_timer(&map->gc); map->gc.data = (unsigned long) set; map->gc.function = gc; - map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; + map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; add_timer(&map->gc); } /* Create list:set type of sets */ -static struct list_set * -init_list_set(struct ip_set *set, u32 size, size_t dsize, - unsigned long timeout) +static bool +init_list_set(struct net *net, struct ip_set *set, u32 size) { struct list_set *map; struct set_elem *e; u32 i; - map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); + map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL); if (!map) - return NULL; + return false; map->size = size; - map->dsize = dsize; - map->timeout = timeout; + map->net = net; set->data = map; for (i = 0; i < size; i++) { - e = list_set_elem(map, i); + e = list_set_elem(set, map, i); e->id = IPSET_INVALID_ID; } - return map; + return true; } static int -list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) +list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[], + u32 flags) { - struct list_set *map; - u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0; - unsigned long timeout = 0; + u32 size = IP_SET_LIST_DEFAULT_SIZE; if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || @@ -654,45 +628,13 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) if (size < IP_SET_LIST_MIN_SIZE) size = IP_SET_LIST_MIN_SIZE; - if (tb[IPSET_ATTR_CADT_FLAGS]) - cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); - if (tb[IPSET_ATTR_TIMEOUT]) - timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); set->variant = &set_variant; - if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { - set->extensions |= IPSET_EXT_COUNTER; - if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct setct_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct setct_elem, timeout); - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct setct_elem, counter); - list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, - sizeof(struct setc_elem), 0); - if (!map) - return -ENOMEM; - map->offset[IPSET_OFFSET_COUNTER] = - offsetof(struct setc_elem, counter); - } - } else if (tb[IPSET_ATTR_TIMEOUT]) { - map = init_list_set(set, size, - sizeof(struct sett_elem), timeout); - if (!map) - return -ENOMEM; - set->extensions |= IPSET_EXT_TIMEOUT; - map->offset[IPSET_OFFSET_TIMEOUT] = - offsetof(struct sett_elem, timeout); + set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem)); + if (!init_list_set(net, set, size)) + return -ENOMEM; + if (tb[IPSET_ATTR_TIMEOUT]) { + set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); list_set_gc_init(set, list_set_gc); - } else { - map = init_list_set(set, size, sizeof(struct set_elem), 0); - if (!map) - return -ENOMEM; } return 0; } @@ -703,8 +645,8 @@ static struct ip_set_type list_set_type __read_mostly = { .features = IPSET_TYPE_NAME | IPSET_DUMP_LAST, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, - .revision_min = REVISION_MIN, - .revision_max = REVISION_MAX, + .revision_min = IPSET_TYPE_REV_MIN, + .revision_max = IPSET_TYPE_REV_MAX, .create = list_set_create, .create_policy = { [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, @@ -721,6 +663,7 @@ static struct ip_set_type list_set_type __read_mostly = { [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, + [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING }, }, .me = THIS_MODULE, }; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index e0c4373b4747..466410eaa482 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -52,66 +52,8 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, const char **dptr, - unsigned int *datalen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_hook); - -void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff, - s16 off) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); - -unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *exp, - unsigned int matchoff, - unsigned int matchlen) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); - -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - enum sdp_header_types type, - enum sdp_header_types term, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); - -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int matchoff, - unsigned int matchlen, - u_int16_t port) __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); - -unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - unsigned int sdpoff, - const union nf_inet_addr *addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); - -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff, - unsigned int dataoff, - const char **dptr, - unsigned int *datalen, - struct nf_conntrack_expect *rtp_exp, - struct nf_conntrack_expect *rtcp_exp, - unsigned int mediaoff, - unsigned int medialen, - union nf_inet_addr *rtp_addr) - __read_mostly; -EXPORT_SYMBOL_GPL(nf_nat_sdp_media_hook); +const struct nf_nat_sip_hooks *nf_nat_sip_hooks; +EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); static int string_len(const struct nf_conn *ct, const char *dptr, const char *limit, int *shift) @@ -914,8 +856,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, int direct_rtp = 0, skip_expect = 0, ret = NF_DROP; u_int16_t base_port; __be16 rtp_port, rtcp_port; - typeof(nf_nat_sdp_port_hook) nf_nat_sdp_port; - typeof(nf_nat_sdp_media_hook) nf_nat_sdp_media; + const struct nf_nat_sip_hooks *hooks; saddr = NULL; if (sip_direct_media) { @@ -966,22 +907,23 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, #endif skip_expect = 1; } while (!skip_expect); - rcu_read_unlock(); base_port = ntohs(tuple.dst.u.udp.port) & ~1; rtp_port = htons(base_port); rtcp_port = htons(base_port + 1); if (direct_rtp) { - nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); - if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && + !hooks->sdp_port(skb, protoff, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } - if (skip_expect) + if (skip_expect) { + rcu_read_unlock(); return NF_ACCEPT; + } rtp_exp = nf_ct_expect_alloc(ct); if (rtp_exp == NULL) @@ -995,10 +937,10 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_init(rtcp_exp, class, nf_ct_l3num(ct), saddr, daddr, IPPROTO_UDP, NULL, &rtcp_port); - nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); - if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen, - rtp_exp, rtcp_exp, + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK && !direct_rtp) + ret = hooks->sdp_media(skb, protoff, dataoff, dptr, + datalen, rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { if (nf_ct_expect_related(rtp_exp) == 0) { @@ -1012,6 +954,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, err2: nf_ct_expect_put(rtp_exp); err1: + rcu_read_unlock(); return ret; } @@ -1051,13 +994,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, unsigned int caddr_len, maddr_len; unsigned int i; union nf_inet_addr caddr, maddr, rtp_addr; + const struct nf_nat_sip_hooks *hooks; unsigned int port; const struct sdp_media_type *t; int ret = NF_ACCEPT; - typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr; - typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session; - nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook); + hooks = rcu_dereference(nf_nat_sip_hooks); /* Find beginning of session description */ if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen, @@ -1125,10 +1067,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update media connection address if present */ - if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, protoff, dataoff, + if (maddr_len && hooks && ct->status & IPS_NAT_MASK) { + ret = hooks->sdp_addr(skb, protoff, dataoff, dptr, datalen, mediaoff, - SDP_HDR_CONNECTION, SDP_HDR_MEDIA, + SDP_HDR_CONNECTION, + SDP_HDR_MEDIA, &rtp_addr); if (ret != NF_ACCEPT) { nf_ct_helper_log(skb, ct, "cannot mangle SDP"); @@ -1139,10 +1082,11 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff, } /* Update session connection and owner addresses */ - nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); - if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, protoff, dataoff, - dptr, datalen, sdpoff, &rtp_addr); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->sdp_session(skb, protoff, dataoff, + dptr, datalen, sdpoff, + &rtp_addr); return ret; } @@ -1242,11 +1186,11 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, unsigned int matchoff, matchlen; struct nf_conntrack_expect *exp; union nf_inet_addr *saddr, daddr; + const struct nf_nat_sip_hooks *hooks; __be16 port; u8 proto; unsigned int expires = 0; int ret; - typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect; /* Expected connections can not register again. */ if (ct->status & IPS_EXPECTED) @@ -1309,10 +1253,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff, exp->helper = nfct_help(ct)->helper; exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; - nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); - if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen, - exp, matchoff, matchlen); + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && ct->status & IPS_NAT_MASK) + ret = hooks->expect(skb, protoff, dataoff, dptr, datalen, + exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) { nf_ct_helper_log(skb, ct, "cannot add expectation"); @@ -1515,7 +1459,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, unsigned int protoff, unsigned int dataoff, const char **dptr, unsigned int *datalen) { - typeof(nf_nat_sip_hook) nf_nat_sip; + const struct nf_nat_sip_hooks *hooks; int ret; if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) @@ -1524,9 +1468,9 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff, - dptr, datalen)) { + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks && !hooks->msg(skb, protoff, dataoff, + dptr, datalen)) { nf_ct_helper_log(skb, ct, "cannot NAT SIP message"); ret = NF_DROP; } @@ -1546,7 +1490,6 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, s16 diff, tdiff = 0; int ret = NF_ACCEPT; bool term; - typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) @@ -1610,9 +1553,11 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, } if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); - if (nf_nat_sip_seq_adjust) - nf_nat_sip_seq_adjust(skb, protoff, tdiff); + const struct nf_nat_sip_hooks *hooks; + + hooks = rcu_dereference(nf_nat_sip_hooks); + if (hooks) + hooks->seq_adjust(skb, protoff, tdiff); } return ret; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index f9790405b7ff..b4d691db955e 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -625,33 +625,26 @@ static struct nf_ct_helper_expectfn sip_nat = { static void __exit nf_nat_sip_fini(void) { - RCU_INIT_POINTER(nf_nat_sip_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, NULL); + nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); } +static const struct nf_nat_sip_hooks sip_hooks = { + .msg = nf_nat_sip, + .seq_adjust = nf_nat_sip_seq_adjust, + .expect = nf_nat_sip_expect, + .sdp_addr = nf_nat_sdp_addr, + .sdp_port = nf_nat_sdp_port, + .sdp_session = nf_nat_sdp_session, + .sdp_media = nf_nat_sdp_media, +}; + static int __init nf_nat_sip_init(void) { - BUG_ON(nf_nat_sip_hook != NULL); - BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); - BUG_ON(nf_nat_sip_expect_hook != NULL); - BUG_ON(nf_nat_sdp_addr_hook != NULL); - BUG_ON(nf_nat_sdp_port_hook != NULL); - BUG_ON(nf_nat_sdp_session_hook != NULL); - BUG_ON(nf_nat_sdp_media_hook != NULL); - RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip); - RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust); - RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect); - RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr); - RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port); - RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session); - RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media); + BUG_ON(nf_nat_sip_hooks != NULL); + RCU_INIT_POINTER(nf_nat_sip_hooks, &sip_hooks); nf_ct_helper_expectfn_register(&sip_nat); return 0; } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 50580494148d..476accd17145 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -49,10 +49,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { }; static int -ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, - struct nf_conntrack_l4proto *l4proto, - struct net *net, - const struct nlattr *attr) +ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, + struct net *net, const struct nlattr *attr) { int ret = 0; @@ -64,8 +62,7 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, if (ret < 0) return ret; - ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, - &timeout->data); + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); } return ret; } @@ -123,7 +120,8 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(matching, l4proto, net, + ret = ctnl_timeout_parse_policy(&matching->data, + l4proto, net, cda[CTA_TIMEOUT_DATA]); return ret; } @@ -138,7 +136,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, goto err_proto_put; } - ret = ctnl_timeout_parse_policy(timeout, l4proto, net, + ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; @@ -342,6 +340,147 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, return ret; } +static int +cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct net *net = sock_net(skb->sk); + unsigned int *timeouts; + int ret; + + if (!cda[CTA_TIMEOUT_L3PROTO] || + !cda[CTA_TIMEOUT_L4PROTO] || + !cda[CTA_TIMEOUT_DATA]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supported, skip. */ + if (l4proto->l4proto != l4num) { + ret = -EOPNOTSUPP; + goto err; + } + + timeouts = l4proto->get_timeouts(net); + + ret = ctnl_timeout_parse_policy(timeouts, l4proto, net, + cda[CTA_TIMEOUT_DATA]); + if (ret < 0) + goto err; + + nf_ct_l4proto_put(l4proto); + return 0; +err: + nf_ct_l4proto_put(l4proto); + return ret; +} + +static int +cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, + u32 seq, u32 type, int event, + struct nf_conntrack_l4proto *l4proto) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = portid ? NLM_F_MULTI : 0; + + event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) || + nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) + goto nla_put_failure; + + if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { + struct nlattr *nest_parms; + unsigned int *timeouts = l4proto->get_timeouts(net); + int ret; + + nest_parms = nla_nest_start(skb, + CTA_TIMEOUT_DATA | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); + if (ret < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + } + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct net *net = sock_net(skb->sk); + struct sk_buff *skb2; + int ret, err; + + if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) + return -EINVAL; + + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + l4proto = nf_ct_l4proto_find_get(l3num, l4num); + + /* This protocol is not supported, skip. */ + if (l4proto->l4proto != l4num) { + err = -EOPNOTSUPP; + goto err; + } + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + err = -ENOMEM; + goto err; + } + + ret = cttimeout_default_fill_info(net, skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + IPCTNL_MSG_TIMEOUT_DEFAULT_SET, + l4proto); + if (ret <= 0) { + kfree_skb(skb2); + err = -ENOMEM; + goto err; + } + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; +err: + nf_ct_l4proto_put(l4proto); + return err; +} + #ifdef CONFIG_NF_CONNTRACK_TIMEOUT static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) { @@ -384,6 +523,12 @@ static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DEFAULT_SET]= { .call = cttimeout_default_set, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DEFAULT_GET]= { .call = cttimeout_default_get, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, }; static const struct nfnetlink_subsystem cttimeout_subsys = { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index d92cc317bf8b..3c4b69e5fe17 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -319,7 +319,8 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags) } static struct sk_buff * -nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) +nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, + unsigned int pkt_size) { struct sk_buff *skb; unsigned int n; @@ -328,13 +329,13 @@ nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size) * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC); + skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = nfnetlink_alloc_skb(&init_net, pkt_size, + skb = nfnetlink_alloc_skb(net, pkt_size, peer_portid, GFP_ATOMIC); if (!skb) pr_err("nfnetlink_log: can't even alloc %u bytes\n", @@ -702,8 +703,8 @@ nfulnl_log_packet(struct net *net, } if (!inst->skb) { - inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz, - size); + inst->skb = nfulnl_alloc_skb(net, inst->peer_portid, + inst->nlbufsiz, size); if (!inst->skb) goto alloc_failure; } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index ae2e5c11d01a..21258cf70091 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -298,7 +298,7 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, } static struct sk_buff * -nfqnl_build_packet_message(struct nfqnl_instance *queue, +nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, __be32 **packet_id_ptr) { @@ -372,7 +372,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (queue->flags & NFQA_CFG_F_CONNTRACK) ct = nfqnl_ct_get(entskb, &size, &ctinfo); - skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid, + skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); if (!skb) return NULL; @@ -525,7 +525,7 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, __be32 *packet_id_ptr; int failopen = 0; - nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); + nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr); if (nskb == NULL) { err = -ENOMEM; goto err_out; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index cd24290f3b2f..e762de5ee89b 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -43,10 +43,42 @@ optlen(const u_int8_t *opt, unsigned int offset) return opt[offset+1]; } +static u_int32_t tcpmss_reverse_mtu(struct net *net, + const struct sk_buff *skb, + unsigned int family) +{ + struct flowi fl; + const struct nf_afinfo *ai; + struct rtable *rt = NULL; + u_int32_t mtu = ~0U; + + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + + memset(fl6, 0, sizeof(*fl6)); + fl6->daddr = ipv6_hdr(skb)->saddr; + } + rcu_read_lock(); + ai = nf_get_afinfo(family); + if (ai != NULL) + ai->route(net, (struct dst_entry **)&rt, &fl, false); + rcu_read_unlock(); + + if (rt != NULL) { + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); + } + return mtu; +} + static int tcpmss_mangle_packet(struct sk_buff *skb, const struct xt_action_param *par, - unsigned int in_mtu, + unsigned int family, unsigned int tcphoff, unsigned int minlen) { @@ -76,6 +108,9 @@ tcpmss_mangle_packet(struct sk_buff *skb, return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + struct net *net = dev_net(par->in ? par->in : par->out); + unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); + if (dst_mtu(skb_dst(skb)) <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", dst_mtu(skb_dst(skb))); @@ -165,37 +200,6 @@ tcpmss_mangle_packet(struct sk_buff *skb, return TCPOLEN_MSS; } -static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, - unsigned int family) -{ - struct flowi fl; - const struct nf_afinfo *ai; - struct rtable *rt = NULL; - u_int32_t mtu = ~0U; - - if (family == PF_INET) { - struct flowi4 *fl4 = &fl.u.ip4; - memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = ip_hdr(skb)->saddr; - } else { - struct flowi6 *fl6 = &fl.u.ip6; - - memset(fl6, 0, sizeof(*fl6)); - fl6->daddr = ipv6_hdr(skb)->saddr; - } - rcu_read_lock(); - ai = nf_get_afinfo(family); - if (ai != NULL) - ai->route(&init_net, (struct dst_entry **)&rt, &fl, false); - rcu_read_unlock(); - - if (rt != NULL) { - mtu = dst_mtu(&rt->dst); - dst_release(&rt->dst); - } - return mtu; -} - static unsigned int tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) { @@ -204,7 +208,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) int ret; ret = tcpmss_mangle_packet(skb, par, - tcpmss_reverse_mtu(skb, PF_INET), + PF_INET, iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) @@ -233,7 +237,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (tcphoff < 0) return NF_DROP; ret = tcpmss_mangle_packet(skb, par, - tcpmss_reverse_mtu(skb, PF_INET6), + PF_INET6, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 31790e789e22..e7c4e0e01ff5 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -81,7 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) struct xt_set_info_match_v0 *info = par->matchinfo; ip_set_id_t index; - index = ip_set_nfnl_get_byindex(info->match_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find set indentified by id %u to match\n", @@ -91,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) { pr_warning("Protocol error: set match dimension " "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -106,9 +106,104 @@ set_match_v0_destroy(const struct xt_mtdtor_param *par) { struct xt_set_info_match_v0 *info = par->matchinfo; - ip_set_nfnl_put(info->match_set.index); + ip_set_nfnl_put(par->net, info->match_set.index); } +/* Revision 1 match */ + +static bool +set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v1 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, 0, UINT_MAX); + + if (opt.flags & IPSET_RETURN_NOMATCH) + opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; + + return match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); +} + +static int +set_match_v1_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_set_info_match_v1 *info = par->matchinfo; + ip_set_id_t index; + + index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); + + if (index == IPSET_INVALID_ID) { + pr_warning("Cannot find set indentified by id %u to match\n", + info->match_set.index); + return -ENOENT; + } + if (info->match_set.dim > IPSET_DIM_MAX) { + pr_warning("Protocol error: set match dimension " + "is over the limit!\n"); + ip_set_nfnl_put(par->net, info->match_set.index); + return -ERANGE; + } + + return 0; +} + +static void +set_match_v1_destroy(const struct xt_mtdtor_param *par) +{ + struct xt_set_info_match_v1 *info = par->matchinfo; + + ip_set_nfnl_put(par->net, info->match_set.index); +} + +/* Revision 3 match */ + +static bool +match_counter(u64 counter, const struct ip_set_counter_match *info) +{ + switch (info->op) { + case IPSET_COUNTER_NONE: + return true; + case IPSET_COUNTER_EQ: + return counter == info->value; + case IPSET_COUNTER_NE: + return counter != info->value; + case IPSET_COUNTER_LT: + return counter < info->value; + case IPSET_COUNTER_GT: + return counter > info->value; + } + return false; +} + +static bool +set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v3 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, info->flags, UINT_MAX); + int ret; + + if (info->packets.op != IPSET_COUNTER_NONE || + info->bytes.op != IPSET_COUNTER_NONE) + opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; + + ret = match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); + + if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) + return ret; + + if (!match_counter(opt.ext.packets, &info->packets)) + return 0; + return match_counter(opt.ext.bytes, &info->bytes); +} + +#define set_match_v3_checkentry set_match_v1_checkentry +#define set_match_v3_destroy set_match_v1_destroy + +/* Revision 0 interface: backward compatible with netfilter/iptables */ + static unsigned int set_target_v0(struct sk_buff *skb, const struct xt_action_param *par) { @@ -133,7 +228,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -142,12 +237,12 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -156,9 +251,9 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -175,57 +270,12 @@ set_target_v0_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v0 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } -/* Revision 1 match and target */ - -static bool -set_match_v1(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct xt_set_info_match_v1 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, - info->match_set.flags, 0, UINT_MAX); - - if (opt.flags & IPSET_RETURN_NOMATCH) - opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; - - return match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); -} - -static int -set_match_v1_checkentry(const struct xt_mtchk_param *par) -{ - struct xt_set_info_match_v1 *info = par->matchinfo; - ip_set_id_t index; - - index = ip_set_nfnl_get_byindex(info->match_set.index); - - if (index == IPSET_INVALID_ID) { - pr_warning("Cannot find set indentified by id %u to match\n", - info->match_set.index); - return -ENOENT; - } - if (info->match_set.dim > IPSET_DIM_MAX) { - pr_warning("Protocol error: set match dimension " - "is over the limit!\n"); - ip_set_nfnl_put(info->match_set.index); - return -ERANGE; - } - - return 0; -} - -static void -set_match_v1_destroy(const struct xt_mtdtor_param *par) -{ - struct xt_set_info_match_v1 *info = par->matchinfo; - - ip_set_nfnl_put(info->match_set.index); -} +/* Revision 1 target */ static unsigned int set_target_v1(struct sk_buff *skb, const struct xt_action_param *par) @@ -251,7 +301,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) ip_set_id_t index; if (info->add_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->add_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find add_set index %u as target\n", info->add_set.index); @@ -260,12 +310,12 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->del_set.index != IPSET_INVALID_ID) { - index = ip_set_nfnl_get_byindex(info->del_set.index); + index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { pr_warning("Cannot find del_set index %u as target\n", info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; } } @@ -274,9 +324,9 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) pr_warning("Protocol error: SET target dimension " "is over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); return -ERANGE; } @@ -289,9 +339,9 @@ set_target_v1_destroy(const struct xt_tgdtor_param *par) const struct xt_set_info_target_v1 *info = par->targinfo; if (info->add_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->add_set.index); + ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) - ip_set_nfnl_put(info->del_set.index); + ip_set_nfnl_put(par->net, info->del_set.index); } /* Revision 2 target */ @@ -320,52 +370,6 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) #define set_target_v2_checkentry set_target_v1_checkentry #define set_target_v2_destroy set_target_v1_destroy -/* Revision 3 match */ - -static bool -match_counter(u64 counter, const struct ip_set_counter_match *info) -{ - switch (info->op) { - case IPSET_COUNTER_NONE: - return true; - case IPSET_COUNTER_EQ: - return counter == info->value; - case IPSET_COUNTER_NE: - return counter != info->value; - case IPSET_COUNTER_LT: - return counter < info->value; - case IPSET_COUNTER_GT: - return counter > info->value; - } - return false; -} - -static bool -set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) -{ - const struct xt_set_info_match_v3 *info = par->matchinfo; - ADT_OPT(opt, par->family, info->match_set.dim, - info->match_set.flags, info->flags, UINT_MAX); - int ret; - - if (info->packets.op != IPSET_COUNTER_NONE || - info->bytes.op != IPSET_COUNTER_NONE) - opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; - - ret = match_set(info->match_set.index, skb, par, &opt, - info->match_set.flags & IPSET_INV_MATCH); - - if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) - return ret; - - if (!match_counter(opt.ext.packets, &info->packets)) - return 0; - return match_counter(opt.ext.bytes, &info->bytes); -} - -#define set_match_v3_checkentry set_match_v1_checkentry -#define set_match_v3_destroy set_match_v1_destroy - static struct xt_match set_matches[] __read_mostly = { { .name = "set", diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index a481c03e2861..56e22b74cf96 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -173,7 +173,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->local_df = 1; - inet_get_local_port_range(&port_min, &port_max); + inet_get_local_port_range(net, &port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 189e3c5b3d09..272d8e924cf6 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -231,14 +231,14 @@ override: } if (R_tab) { police->rate_present = true; - psched_ratecfg_precompute(&police->rate, &R_tab->rate); + psched_ratecfg_precompute(&police->rate, &R_tab->rate, 0); qdisc_put_rtab(R_tab); } else { police->rate_present = false; } if (P_tab) { police->peak_present = true; - psched_ratecfg_precompute(&police->peak, &P_tab->rate); + psched_ratecfg_precompute(&police->peak, &P_tab->rate, 0); qdisc_put_rtab(P_tab); } else { police->peak_present = false; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index d76a35d0dc85..636d9131d870 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -137,7 +137,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *est) { - int err = -EINVAL; + int err; struct tcf_exts e; struct tcf_ematch_tree t; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 867b4a3e3980..16006c92c3fd 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -72,11 +72,11 @@ static void cgrp_attach(struct cgroup_subsys_state *css, struct cgroup_taskset *tset) { struct task_struct *p; - void *v; + struct cgroup_cls_state *cs = css_cls_state(css); + void *v = (void *)(unsigned long)cs->classid; cgroup_taskset_for_each(p, css, tset) { task_lock(p); - v = (void *)(unsigned long)task_cls_classid(p); iterate_fd(p->files, 0, update_classid, v); task_unlock(p); } diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 938b7cbf5627..1ac41d3de5c3 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -24,11 +24,12 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, { struct xt_set_info *set = data; ip_set_id_t index; + struct net *net = qdisc_dev(tp->q)->nd_net; if (data_len != sizeof(*set)) return -EINVAL; - index = ip_set_nfnl_get_byindex(set->index); + index = ip_set_nfnl_get_byindex(net, set->index); if (index == IPSET_INVALID_ID) return -ENOENT; @@ -37,7 +38,7 @@ static int em_ipset_change(struct tcf_proto *tp, void *data, int data_len, if (em->data) return 0; - ip_set_nfnl_put(index); + ip_set_nfnl_put(net, index); return -ENOMEM; } @@ -45,7 +46,7 @@ static void em_ipset_destroy(struct tcf_proto *p, struct tcf_ematch *em) { const struct xt_set_info *set = (const void *) em->data; if (set) { - ip_set_nfnl_put(set->index); + ip_set_nfnl_put(qdisc_dev(p->q)->nd_net, set->index); kfree((void *) em->data); } } diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 7c3de6ffa516..e5cef9567225 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -793,8 +793,10 @@ static int em_meta_change(struct tcf_proto *tp, void *data, int len, goto errout; meta = kzalloc(sizeof(*meta), GFP_KERNEL); - if (meta == NULL) + if (meta == NULL) { + err = -ENOMEM; goto errout; + } memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left)); memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right)); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2adda7fa2d39..cd81505662b8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -737,9 +737,11 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; + int drops; if (n == 0) return; + drops = max_t(int, n, 0); while ((parentid = sch->parent)) { if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) return; @@ -756,6 +758,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) cops->put(sch, cl); } sch->q.qlen -= n; + sch->qstats.drops += drops; } } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a74e278654aa..7fc899a943a8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -829,7 +829,7 @@ void dev_deactivate_many(struct list_head *head) struct net_device *dev; bool sync_needed = false; - list_for_each_entry(dev, head, unreg_list) { + list_for_each_entry(dev, head, close_list) { netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); if (dev_ingress_queue(dev)) @@ -848,7 +848,7 @@ void dev_deactivate_many(struct list_head *head) synchronize_net(); /* Wait for outstanding qdisc_run calls. */ - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry(dev, head, close_list) while (some_qdisc_is_busy(dev)) yield(); } @@ -857,7 +857,7 @@ void dev_deactivate(struct net_device *dev) { LIST_HEAD(single); - list_add(&dev->unreg_list, &single); + list_add(&dev->close_list, &single); dev_deactivate_many(&single); list_del(&single); } @@ -910,11 +910,12 @@ void dev_shutdown(struct net_device *dev) } void psched_ratecfg_precompute(struct psched_ratecfg *r, - const struct tc_ratespec *conf) + const struct tc_ratespec *conf, + u64 rate64) { memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bytes_ps = conf->rate; + r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; /* diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 863846cc5513..0e1e38b40025 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -997,6 +997,8 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, + [TCA_HTB_RATE64] = { .type = NLA_U64 }, + [TCA_HTB_CEIL64] = { .type = NLA_U64 }, }; static void htb_work_func(struct work_struct *work) @@ -1114,6 +1116,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.level = cl->level; if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps)) + goto nla_put_failure; + if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps)) + goto nla_put_failure; nla_nest_end(skb, nest); spin_unlock_bh(root_lock); @@ -1332,6 +1340,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; + u64 rate64, ceil64; /* extract all subattrs from opt attr */ if (!opt) @@ -1491,8 +1500,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - psched_ratecfg_precompute(&cl->rate, &hopt->rate); - psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + + psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 1aaf1b6e51a2..b0571224f3c9 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -341,9 +341,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - psched_ratecfg_precompute(&q->rate, &rtab->rate); + psched_ratecfg_precompute(&q->rate, &rtab->rate, 0); if (ptab) { - psched_ratecfg_precompute(&q->peak, &ptab->rate); + psched_ratecfg_precompute(&q->peak, &ptab->rate, 0); q->peak_present = true; } else { q->peak_present = false; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 911b71b26b0e..72046b9729a8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5890,7 +5890,7 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) int low, high, remaining, index; unsigned int rover; - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(sock_net(sk), &low, &high); remaining = (high - low) + 1; rover = net_random() % remaining + low; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index b9c3f9e943a9..d6e7f98fbfbf 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -468,7 +468,7 @@ expired: } err = __xfrm_state_delete(x); - if (!err && x->id.spi) + if (!err) km_state_expired(x, 1, 0); xfrm_audit_state_delete(x, err ? 0 : 1, |