diff options
author | Anthony G. Basile <blueness@gentoo.org> | 2015-12-23 02:58:43 -0500 |
---|---|---|
committer | Anthony G. Basile <blueness@gentoo.org> | 2015-12-23 02:58:43 -0500 |
commit | 3cf9059c012c71a2844696f25f29fcbd3dacbfc0 (patch) | |
tree | 7842acf860c49b259164a8ed8112b3842f5ff223 | |
parent | grsecurity-3.1-4.3.3-201512162141 (diff) | |
download | hardened-patchset-20151222.tar.gz hardened-patchset-20151222.tar.bz2 hardened-patchset-20151222.zip |
grsecurity-3.1-4.3.3-20151222212920151222
-rw-r--r-- | 4.3.3/0000_README | 6 | ||||
-rw-r--r-- | 4.3.3/1002_linux-4.3.3.patch | 4424 | ||||
-rw-r--r-- | 4.3.3/4420_grsecurity-3.1-4.3.3-201512222129.patch (renamed from 4.3.3/4420_grsecurity-3.1-4.3.3-201512162141.patch) | 295 |
3 files changed, 241 insertions, 4484 deletions
diff --git a/4.3.3/0000_README b/4.3.3/0000_README index 3e1d5a0..2c1a853 100644 --- a/4.3.3/0000_README +++ b/4.3.3/0000_README @@ -2,11 +2,7 @@ README ----------------------------------------------------------------------------- Individual Patch Descriptions: ----------------------------------------------------------------------------- -Patch: 1002_linux-4.3.3.patch -From: http://www.kernel.org -Desc: Linux 4.3.3 - -Patch: 4420_grsecurity-3.1-4.3.3-201512162141.patch +Patch: 4420_grsecurity-3.1-4.3.3-201512222129.patch From: http://www.grsecurity.net Desc: hardened-sources base patch from upstream grsecurity diff --git a/4.3.3/1002_linux-4.3.3.patch b/4.3.3/1002_linux-4.3.3.patch deleted file mode 100644 index d8cd741..0000000 --- a/4.3.3/1002_linux-4.3.3.patch +++ /dev/null @@ -1,4424 +0,0 @@ -diff --git a/Makefile b/Makefile -index 1a4953b..2070d16 100644 ---- a/Makefile -+++ b/Makefile -@@ -1,6 +1,6 @@ - VERSION = 4 - PATCHLEVEL = 3 --SUBLEVEL = 2 -+SUBLEVEL = 3 - EXTRAVERSION = - NAME = Blurry Fish Butt - -diff --git a/block/blk-merge.c b/block/blk-merge.c -index c4e9c37..0e5f4fc 100644 ---- a/block/blk-merge.c -+++ b/block/blk-merge.c -@@ -91,7 +91,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, - - seg_size += bv.bv_len; - bvprv = bv; -- bvprvp = &bv; -+ bvprvp = &bvprv; - sectors += bv.bv_len >> 9; - continue; - } -@@ -101,7 +101,7 @@ new_segment: - - nsegs++; - bvprv = bv; -- bvprvp = &bv; -+ bvprvp = &bvprv; - seg_size = bv.bv_len; - sectors += bv.bv_len >> 9; - } -diff --git a/certs/.gitignore b/certs/.gitignore -new file mode 100644 -index 0000000..f51aea4 ---- /dev/null -+++ b/certs/.gitignore -@@ -0,0 +1,4 @@ -+# -+# Generated files -+# -+x509_certificate_list -diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c -index 128e7df..8630a77 100644 ---- a/drivers/block/rbd.c -+++ b/drivers/block/rbd.c -@@ -3444,6 +3444,7 @@ static void rbd_queue_workfn(struct work_struct *work) - goto err_rq; - } - img_request->rq = rq; -+ snapc = NULL; /* img_request consumes a ref */ - - if (op_type == OBJ_OP_DISCARD) - result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA, -diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c -index f51d376..c2f5117 100644 ---- a/drivers/firewire/ohci.c -+++ b/drivers/firewire/ohci.c -@@ -3675,6 +3675,11 @@ static int pci_probe(struct pci_dev *dev, - - reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, ~0); - ohci->it_context_support = reg_read(ohci, OHCI1394_IsoXmitIntMaskSet); -+ /* JMicron JMB38x often shows 0 at first read, just ignore it */ -+ if (!ohci->it_context_support) { -+ ohci_notice(ohci, "overriding IsoXmitIntMask\n"); -+ ohci->it_context_support = 0xf; -+ } - reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, ~0); - ohci->it_context_mask = ohci->it_context_support; - ohci->n_it = hweight32(ohci->it_context_mask); -diff --git a/drivers/media/pci/cobalt/Kconfig b/drivers/media/pci/cobalt/Kconfig -index 1f88ccc..a01f0cc 100644 ---- a/drivers/media/pci/cobalt/Kconfig -+++ b/drivers/media/pci/cobalt/Kconfig -@@ -1,6 +1,6 @@ - config VIDEO_COBALT - tristate "Cisco Cobalt support" -- depends on VIDEO_V4L2 && I2C && MEDIA_CONTROLLER -+ depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API - depends on PCI_MSI && MTD_COMPLEX_MAPPINGS - depends on GPIOLIB || COMPILE_TEST - depends on SND -diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c -index a937772..7f709cb 100644 ---- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c -+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c -@@ -1583,8 +1583,14 @@ err_disable_device: - static void nicvf_remove(struct pci_dev *pdev) - { - struct net_device *netdev = pci_get_drvdata(pdev); -- struct nicvf *nic = netdev_priv(netdev); -- struct net_device *pnetdev = nic->pnicvf->netdev; -+ struct nicvf *nic; -+ struct net_device *pnetdev; -+ -+ if (!netdev) -+ return; -+ -+ nic = netdev_priv(netdev); -+ pnetdev = nic->pnicvf->netdev; - - /* Check if this Qset is assigned to different VF. - * If yes, clean primary and all secondary Qsets. -diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c -index 731423c..8bead97 100644 ---- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c -+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c -@@ -4934,26 +4934,41 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave) - struct res_counter *counter; - struct res_counter *tmp; - int err; -- int index; -+ int *counters_arr = NULL; -+ int i, j; - - err = move_all_busy(dev, slave, RES_COUNTER); - if (err) - mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n", - slave); - -- spin_lock_irq(mlx4_tlock(dev)); -- list_for_each_entry_safe(counter, tmp, counter_list, com.list) { -- if (counter->com.owner == slave) { -- index = counter->com.res_id; -- rb_erase(&counter->com.node, -- &tracker->res_tree[RES_COUNTER]); -- list_del(&counter->com.list); -- kfree(counter); -- __mlx4_counter_free(dev, index); -+ counters_arr = kmalloc_array(dev->caps.max_counters, -+ sizeof(*counters_arr), GFP_KERNEL); -+ if (!counters_arr) -+ return; -+ -+ do { -+ i = 0; -+ j = 0; -+ spin_lock_irq(mlx4_tlock(dev)); -+ list_for_each_entry_safe(counter, tmp, counter_list, com.list) { -+ if (counter->com.owner == slave) { -+ counters_arr[i++] = counter->com.res_id; -+ rb_erase(&counter->com.node, -+ &tracker->res_tree[RES_COUNTER]); -+ list_del(&counter->com.list); -+ kfree(counter); -+ } -+ } -+ spin_unlock_irq(mlx4_tlock(dev)); -+ -+ while (j < i) { -+ __mlx4_counter_free(dev, counters_arr[j++]); - mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); - } -- } -- spin_unlock_irq(mlx4_tlock(dev)); -+ } while (i); -+ -+ kfree(counters_arr); - } - - static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) -diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c -index 59874d6..443632d 100644 ---- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c -+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c -@@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) - return err; - } - -+static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, -+ u32 tirn) -+{ -+ void *in; -+ int inlen; -+ int err; -+ -+ inlen = MLX5_ST_SZ_BYTES(modify_tir_in); -+ in = mlx5_vzalloc(inlen); -+ if (!in) -+ return -ENOMEM; -+ -+ MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); -+ -+ err = mlx5_core_modify_tir(mdev, tirn, in, inlen); -+ -+ kvfree(in); -+ -+ return err; -+} -+ -+static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) -+{ -+ int err; -+ int i; -+ -+ for (i = 0; i < MLX5E_NUM_TT; i++) { -+ err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev, -+ priv->tirn[i]); -+ if (err) -+ return err; -+ } -+ -+ return 0; -+} -+ - static int mlx5e_set_dev_port_mtu(struct net_device *netdev) - { - struct mlx5e_priv *priv = netdev_priv(netdev); -@@ -1367,13 +1403,20 @@ int mlx5e_open_locked(struct net_device *netdev) - - err = mlx5e_set_dev_port_mtu(netdev); - if (err) -- return err; -+ goto err_clear_state_opened_flag; - - err = mlx5e_open_channels(priv); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", - __func__, err); -- return err; -+ goto err_clear_state_opened_flag; -+ } -+ -+ err = mlx5e_refresh_tirs_self_loopback_enable(priv); -+ if (err) { -+ netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", -+ __func__, err); -+ goto err_close_channels; - } - - mlx5e_update_carrier(priv); -@@ -1382,6 +1425,12 @@ int mlx5e_open_locked(struct net_device *netdev) - schedule_delayed_work(&priv->update_stats_work, 0); - - return 0; -+ -+err_close_channels: -+ mlx5e_close_channels(priv); -+err_clear_state_opened_flag: -+ clear_bit(MLX5E_STATE_OPENED, &priv->state); -+ return err; - } - - static int mlx5e_open(struct net_device *netdev) -@@ -1899,6 +1948,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) - "Not creating net device, some required device capabilities are missing\n"); - return -ENOTSUPP; - } -+ if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) -+ mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); -+ - return 0; - } - -diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c -index b4f2123..79ef799 100644 ---- a/drivers/net/ethernet/realtek/r8169.c -+++ b/drivers/net/ethernet/realtek/r8169.c -@@ -7429,15 +7429,15 @@ process_pkt: - - rtl8169_rx_vlan_tag(desc, skb); - -+ if (skb->pkt_type == PACKET_MULTICAST) -+ dev->stats.multicast++; -+ - napi_gro_receive(&tp->napi, skb); - - u64_stats_update_begin(&tp->rx_stats.syncp); - tp->rx_stats.packets++; - tp->rx_stats.bytes += pkt_size; - u64_stats_update_end(&tp->rx_stats.syncp); -- -- if (skb->pkt_type == PACKET_MULTICAST) -- dev->stats.multicast++; - } - release_descriptor: - desc->opts2 = 0; -diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c -index 9c71295..85e64044 100644 ---- a/drivers/net/phy/broadcom.c -+++ b/drivers/net/phy/broadcom.c -@@ -675,7 +675,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { - { PHY_ID_BCM5461, 0xfffffff0 }, - { PHY_ID_BCM54616S, 0xfffffff0 }, - { PHY_ID_BCM5464, 0xfffffff0 }, -- { PHY_ID_BCM5482, 0xfffffff0 }, -+ { PHY_ID_BCM5481, 0xfffffff0 }, - { PHY_ID_BCM5482, 0xfffffff0 }, - { PHY_ID_BCM50610, 0xfffffff0 }, - { PHY_ID_BCM50610M, 0xfffffff0 }, -diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c -index 2a7c1be..66e0853 100644 ---- a/drivers/net/usb/qmi_wwan.c -+++ b/drivers/net/usb/qmi_wwan.c -@@ -775,6 +775,7 @@ static const struct usb_device_id products[] = { - {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ - {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ - {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */ -+ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ - {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */ - {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */ - {QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */ -diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c -index 488c6f5..c9e309c 100644 ---- a/drivers/net/vrf.c -+++ b/drivers/net/vrf.c -@@ -581,7 +581,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, - { - struct net_vrf *vrf = netdev_priv(dev); - struct net_vrf_dev *vrf_ptr; -- int err; - - if (!data || !data[IFLA_VRF_TABLE]) - return -EINVAL; -@@ -590,26 +589,16 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, - - dev->priv_flags |= IFF_VRF_MASTER; - -- err = -ENOMEM; - vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL); - if (!vrf_ptr) -- goto out_fail; -+ return -ENOMEM; - - vrf_ptr->ifindex = dev->ifindex; - vrf_ptr->tb_id = vrf->tb_id; - -- err = register_netdevice(dev); -- if (err < 0) -- goto out_fail; -- - rcu_assign_pointer(dev->vrf_ptr, vrf_ptr); - -- return 0; -- --out_fail: -- kfree(vrf_ptr); -- free_netdev(dev); -- return err; -+ return register_netdev(dev); - } - - static size_t vrf_nl_getsize(const struct net_device *dev) -diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h -index 938efe3..94eea1f 100644 ---- a/fs/btrfs/ctree.h -+++ b/fs/btrfs/ctree.h -@@ -3398,7 +3398,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, - int btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, -- u64 owner, u64 offset, int no_quota); -+ u64 owner, u64 offset); - - int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len, - int delalloc); -@@ -3411,7 +3411,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, - int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, -- u64 root_objectid, u64 owner, u64 offset, int no_quota); -+ u64 root_objectid, u64 owner, u64 offset); - - int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root); -diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c -index ac3e81d..7832031 100644 ---- a/fs/btrfs/delayed-ref.c -+++ b/fs/btrfs/delayed-ref.c -@@ -197,6 +197,119 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, - trans->delayed_ref_updates--; - } - -+static bool merge_ref(struct btrfs_trans_handle *trans, -+ struct btrfs_delayed_ref_root *delayed_refs, -+ struct btrfs_delayed_ref_head *head, -+ struct btrfs_delayed_ref_node *ref, -+ u64 seq) -+{ -+ struct btrfs_delayed_ref_node *next; -+ bool done = false; -+ -+ next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, -+ list); -+ while (!done && &next->list != &head->ref_list) { -+ int mod; -+ struct btrfs_delayed_ref_node *next2; -+ -+ next2 = list_next_entry(next, list); -+ -+ if (next == ref) -+ goto next; -+ -+ if (seq && next->seq >= seq) -+ goto next; -+ -+ if (next->type != ref->type) -+ goto next; -+ -+ if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY || -+ ref->type == BTRFS_SHARED_BLOCK_REF_KEY) && -+ comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref), -+ btrfs_delayed_node_to_tree_ref(next), -+ ref->type)) -+ goto next; -+ if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY || -+ ref->type == BTRFS_SHARED_DATA_REF_KEY) && -+ comp_data_refs(btrfs_delayed_node_to_data_ref(ref), -+ btrfs_delayed_node_to_data_ref(next))) -+ goto next; -+ -+ if (ref->action == next->action) { -+ mod = next->ref_mod; -+ } else { -+ if (ref->ref_mod < next->ref_mod) { -+ swap(ref, next); -+ done = true; -+ } -+ mod = -next->ref_mod; -+ } -+ -+ drop_delayed_ref(trans, delayed_refs, head, next); -+ ref->ref_mod += mod; -+ if (ref->ref_mod == 0) { -+ drop_delayed_ref(trans, delayed_refs, head, ref); -+ done = true; -+ } else { -+ /* -+ * Can't have multiples of the same ref on a tree block. -+ */ -+ WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || -+ ref->type == BTRFS_SHARED_BLOCK_REF_KEY); -+ } -+next: -+ next = next2; -+ } -+ -+ return done; -+} -+ -+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, -+ struct btrfs_fs_info *fs_info, -+ struct btrfs_delayed_ref_root *delayed_refs, -+ struct btrfs_delayed_ref_head *head) -+{ -+ struct btrfs_delayed_ref_node *ref; -+ u64 seq = 0; -+ -+ assert_spin_locked(&head->lock); -+ -+ if (list_empty(&head->ref_list)) -+ return; -+ -+ /* We don't have too many refs to merge for data. */ -+ if (head->is_data) -+ return; -+ -+ spin_lock(&fs_info->tree_mod_seq_lock); -+ if (!list_empty(&fs_info->tree_mod_seq_list)) { -+ struct seq_list *elem; -+ -+ elem = list_first_entry(&fs_info->tree_mod_seq_list, -+ struct seq_list, list); -+ seq = elem->seq; -+ } -+ spin_unlock(&fs_info->tree_mod_seq_lock); -+ -+ ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, -+ list); -+ while (&ref->list != &head->ref_list) { -+ if (seq && ref->seq >= seq) -+ goto next; -+ -+ if (merge_ref(trans, delayed_refs, head, ref, seq)) { -+ if (list_empty(&head->ref_list)) -+ break; -+ ref = list_first_entry(&head->ref_list, -+ struct btrfs_delayed_ref_node, -+ list); -+ continue; -+ } -+next: -+ ref = list_next_entry(ref, list); -+ } -+} -+ - int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - u64 seq) -@@ -292,8 +405,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, - exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node, - list); - /* No need to compare bytenr nor is_head */ -- if (exist->type != ref->type || exist->no_quota != ref->no_quota || -- exist->seq != ref->seq) -+ if (exist->type != ref->type || exist->seq != ref->seq) - goto add_tail; - - if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY || -@@ -524,7 +636,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_head *head_ref, - struct btrfs_delayed_ref_node *ref, u64 bytenr, - u64 num_bytes, u64 parent, u64 ref_root, int level, -- int action, int no_quota) -+ int action) - { - struct btrfs_delayed_tree_ref *full_ref; - struct btrfs_delayed_ref_root *delayed_refs; -@@ -546,7 +658,6 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - ref->action = action; - ref->is_head = 0; - ref->in_tree = 1; -- ref->no_quota = no_quota; - ref->seq = seq; - - full_ref = btrfs_delayed_node_to_tree_ref(ref); -@@ -579,7 +690,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_head *head_ref, - struct btrfs_delayed_ref_node *ref, u64 bytenr, - u64 num_bytes, u64 parent, u64 ref_root, u64 owner, -- u64 offset, int action, int no_quota) -+ u64 offset, int action) - { - struct btrfs_delayed_data_ref *full_ref; - struct btrfs_delayed_ref_root *delayed_refs; -@@ -602,7 +713,6 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, - ref->action = action; - ref->is_head = 0; - ref->in_tree = 1; -- ref->no_quota = no_quota; - ref->seq = seq; - - full_ref = btrfs_delayed_node_to_data_ref(ref); -@@ -633,17 +743,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, u64 parent, - u64 ref_root, int level, int action, -- struct btrfs_delayed_extent_op *extent_op, -- int no_quota) -+ struct btrfs_delayed_extent_op *extent_op) - { - struct btrfs_delayed_tree_ref *ref; - struct btrfs_delayed_ref_head *head_ref; - struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_qgroup_extent_record *record = NULL; - -- if (!is_fstree(ref_root) || !fs_info->quota_enabled) -- no_quota = 0; -- - BUG_ON(extent_op && extent_op->is_data); - ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); - if (!ref) -@@ -672,8 +778,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - bytenr, num_bytes, action, 0); - - add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, -- num_bytes, parent, ref_root, level, action, -- no_quota); -+ num_bytes, parent, ref_root, level, action); - spin_unlock(&delayed_refs->lock); - - return 0; -@@ -694,17 +799,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, - u64 bytenr, u64 num_bytes, - u64 parent, u64 ref_root, - u64 owner, u64 offset, int action, -- struct btrfs_delayed_extent_op *extent_op, -- int no_quota) -+ struct btrfs_delayed_extent_op *extent_op) - { - struct btrfs_delayed_data_ref *ref; - struct btrfs_delayed_ref_head *head_ref; - struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_qgroup_extent_record *record = NULL; - -- if (!is_fstree(ref_root) || !fs_info->quota_enabled) -- no_quota = 0; -- - BUG_ON(extent_op && !extent_op->is_data); - ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); - if (!ref) -@@ -740,7 +841,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, - - add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, - num_bytes, parent, ref_root, owner, offset, -- action, no_quota); -+ action); - spin_unlock(&delayed_refs->lock); - - return 0; -diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h -index 13fb5e6..930887a 100644 ---- a/fs/btrfs/delayed-ref.h -+++ b/fs/btrfs/delayed-ref.h -@@ -68,7 +68,6 @@ struct btrfs_delayed_ref_node { - - unsigned int action:8; - unsigned int type:8; -- unsigned int no_quota:1; - /* is this node still in the rbtree? */ - unsigned int is_head:1; - unsigned int in_tree:1; -@@ -233,15 +232,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, u64 parent, - u64 ref_root, int level, int action, -- struct btrfs_delayed_extent_op *extent_op, -- int no_quota); -+ struct btrfs_delayed_extent_op *extent_op); - int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, - u64 parent, u64 ref_root, - u64 owner, u64 offset, int action, -- struct btrfs_delayed_extent_op *extent_op, -- int no_quota); -+ struct btrfs_delayed_extent_op *extent_op); - int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, - struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, -diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c -index 601d7d4..cadacf6 100644 ---- a/fs/btrfs/extent-tree.c -+++ b/fs/btrfs/extent-tree.c -@@ -95,8 +95,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 parent, u64 root_objectid, - u64 flags, struct btrfs_disk_key *key, -- int level, struct btrfs_key *ins, -- int no_quota); -+ int level, struct btrfs_key *ins); - static int do_chunk_alloc(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, u64 flags, - int force); -@@ -2009,8 +2008,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, - int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, -- u64 root_objectid, u64 owner, u64 offset, -- int no_quota) -+ u64 root_objectid, u64 owner, u64 offset) - { - int ret; - struct btrfs_fs_info *fs_info = root->fs_info; -@@ -2022,12 +2020,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, - num_bytes, - parent, root_objectid, (int)owner, -- BTRFS_ADD_DELAYED_REF, NULL, no_quota); -+ BTRFS_ADD_DELAYED_REF, NULL); - } else { - ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, - num_bytes, - parent, root_objectid, owner, offset, -- BTRFS_ADD_DELAYED_REF, NULL, no_quota); -+ BTRFS_ADD_DELAYED_REF, NULL); - } - return ret; - } -@@ -2048,15 +2046,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - u64 num_bytes = node->num_bytes; - u64 refs; - int ret; -- int no_quota = node->no_quota; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - -- if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled) -- no_quota = 1; -- - path->reada = 1; - path->leave_spinning = 1; - /* this will setup the path even if it fails to insert the back ref */ -@@ -2291,8 +2285,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, - parent, ref_root, - extent_op->flags_to_set, - &extent_op->key, -- ref->level, &ins, -- node->no_quota); -+ ref->level, &ins); - } else if (node->action == BTRFS_ADD_DELAYED_REF) { - ret = __btrfs_inc_extent_ref(trans, root, node, - parent, ref_root, -@@ -2433,7 +2426,21 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, - } - } - -+ /* -+ * We need to try and merge add/drops of the same ref since we -+ * can run into issues with relocate dropping the implicit ref -+ * and then it being added back again before the drop can -+ * finish. If we merged anything we need to re-loop so we can -+ * get a good ref. -+ * Or we can get node references of the same type that weren't -+ * merged when created due to bumps in the tree mod seq, and -+ * we need to merge them to prevent adding an inline extent -+ * backref before dropping it (triggering a BUG_ON at -+ * insert_inline_extent_backref()). -+ */ - spin_lock(&locked_ref->lock); -+ btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, -+ locked_ref); - - /* - * locked_ref is the head node, so we have to go one -@@ -3109,7 +3116,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, - int level; - int ret = 0; - int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, -- u64, u64, u64, u64, u64, u64, int); -+ u64, u64, u64, u64, u64, u64); - - - if (btrfs_test_is_dummy_root(root)) -@@ -3150,15 +3157,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, - key.offset -= btrfs_file_extent_offset(buf, fi); - ret = process_func(trans, root, bytenr, num_bytes, - parent, ref_root, key.objectid, -- key.offset, 1); -+ key.offset); - if (ret) - goto fail; - } else { - bytenr = btrfs_node_blockptr(buf, i); - num_bytes = root->nodesize; - ret = process_func(trans, root, bytenr, num_bytes, -- parent, ref_root, level - 1, 0, -- 1); -+ parent, ref_root, level - 1, 0); - if (ret) - goto fail; - } -@@ -6233,7 +6239,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, - int extent_slot = 0; - int found_extent = 0; - int num_to_del = 1; -- int no_quota = node->no_quota; - u32 item_size; - u64 refs; - u64 bytenr = node->bytenr; -@@ -6242,9 +6247,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, - bool skinny_metadata = btrfs_fs_incompat(root->fs_info, - SKINNY_METADATA); - -- if (!info->quota_enabled || !is_fstree(root_objectid)) -- no_quota = 1; -- - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; -@@ -6570,7 +6572,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - buf->start, buf->len, - parent, root->root_key.objectid, - btrfs_header_level(buf), -- BTRFS_DROP_DELAYED_REF, NULL, 0); -+ BTRFS_DROP_DELAYED_REF, NULL); - BUG_ON(ret); /* -ENOMEM */ - } - -@@ -6618,7 +6620,7 @@ out: - /* Can return -ENOMEM */ - int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, -- u64 owner, u64 offset, int no_quota) -+ u64 owner, u64 offset) - { - int ret; - struct btrfs_fs_info *fs_info = root->fs_info; -@@ -6641,13 +6643,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, - num_bytes, - parent, root_objectid, (int)owner, -- BTRFS_DROP_DELAYED_REF, NULL, no_quota); -+ BTRFS_DROP_DELAYED_REF, NULL); - } else { - ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, - num_bytes, - parent, root_objectid, owner, - offset, BTRFS_DROP_DELAYED_REF, -- NULL, no_quota); -+ NULL); - } - return ret; - } -@@ -7429,8 +7431,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 parent, u64 root_objectid, - u64 flags, struct btrfs_disk_key *key, -- int level, struct btrfs_key *ins, -- int no_quota) -+ int level, struct btrfs_key *ins) - { - int ret; - struct btrfs_fs_info *fs_info = root->fs_info; -@@ -7520,7 +7521,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, - ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid, - ins->offset, 0, - root_objectid, owner, offset, -- BTRFS_ADD_DELAYED_EXTENT, NULL, 0); -+ BTRFS_ADD_DELAYED_EXTENT, NULL); - return ret; - } - -@@ -7734,7 +7735,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, - ins.objectid, ins.offset, - parent, root_objectid, level, - BTRFS_ADD_DELAYED_EXTENT, -- extent_op, 0); -+ extent_op); - if (ret) - goto out_free_delayed; - } -@@ -8282,7 +8283,7 @@ skip: - } - } - ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, -- root->root_key.objectid, level - 1, 0, 0); -+ root->root_key.objectid, level - 1, 0); - BUG_ON(ret); /* -ENOMEM */ - } - btrfs_tree_unlock(next); -diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c -index 8c6f247..e27ea7a 100644 ---- a/fs/btrfs/file.c -+++ b/fs/btrfs/file.c -@@ -756,8 +756,16 @@ next_slot: - } - - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); -- if (key.objectid > ino || -- key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) -+ -+ if (key.objectid > ino) -+ break; -+ if (WARN_ON_ONCE(key.objectid < ino) || -+ key.type < BTRFS_EXTENT_DATA_KEY) { -+ ASSERT(del_nr == 0); -+ path->slots[0]++; -+ goto next_slot; -+ } -+ if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) - break; - - fi = btrfs_item_ptr(leaf, path->slots[0], -@@ -776,8 +784,8 @@ next_slot: - btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); - } else { -- WARN_ON(1); -- extent_end = search_start; -+ /* can't happen */ -+ BUG(); - } - - /* -@@ -847,7 +855,7 @@ next_slot: - disk_bytenr, num_bytes, 0, - root->root_key.objectid, - new_key.objectid, -- start - extent_offset, 1); -+ start - extent_offset); - BUG_ON(ret); /* -ENOMEM */ - } - key.offset = start; -@@ -925,7 +933,7 @@ delete_extent_item: - disk_bytenr, num_bytes, 0, - root->root_key.objectid, - key.objectid, key.offset - -- extent_offset, 0); -+ extent_offset); - BUG_ON(ret); /* -ENOMEM */ - inode_sub_bytes(inode, - extent_end - key.offset); -@@ -1204,7 +1212,7 @@ again: - - ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, - root->root_key.objectid, -- ino, orig_offset, 1); -+ ino, orig_offset); - BUG_ON(ret); /* -ENOMEM */ - - if (split == start) { -@@ -1231,7 +1239,7 @@ again: - del_nr++; - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, - 0, root->root_key.objectid, -- ino, orig_offset, 0); -+ ino, orig_offset); - BUG_ON(ret); /* -ENOMEM */ - } - other_start = 0; -@@ -1248,7 +1256,7 @@ again: - del_nr++; - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, - 0, root->root_key.objectid, -- ino, orig_offset, 0); -+ ino, orig_offset); - BUG_ON(ret); /* -ENOMEM */ - } - if (del_nr == 0) { -@@ -1868,8 +1876,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) - struct btrfs_log_ctx ctx; - int ret = 0; - bool full_sync = 0; -- const u64 len = end - start + 1; -+ u64 len; - -+ /* -+ * The range length can be represented by u64, we have to do the typecasts -+ * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() -+ */ -+ len = (u64)end - (u64)start + 1; - trace_btrfs_sync_file(file, datasync); - - /* -@@ -2057,8 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) - } - } - if (!full_sync) { -- ret = btrfs_wait_ordered_range(inode, start, -- end - start + 1); -+ ret = btrfs_wait_ordered_range(inode, start, len); - if (ret) { - btrfs_end_transaction(trans, root); - goto out; -diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c -index 611b66d..396e3d5 100644 ---- a/fs/btrfs/inode.c -+++ b/fs/btrfs/inode.c -@@ -1294,8 +1294,14 @@ next_slot: - num_bytes = 0; - btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - -- if (found_key.objectid > ino || -- found_key.type > BTRFS_EXTENT_DATA_KEY || -+ if (found_key.objectid > ino) -+ break; -+ if (WARN_ON_ONCE(found_key.objectid < ino) || -+ found_key.type < BTRFS_EXTENT_DATA_KEY) { -+ path->slots[0]++; -+ goto next_slot; -+ } -+ if (found_key.type > BTRFS_EXTENT_DATA_KEY || - found_key.offset > end) - break; - -@@ -2573,7 +2579,7 @@ again: - ret = btrfs_inc_extent_ref(trans, root, new->bytenr, - new->disk_len, 0, - backref->root_id, backref->inum, -- new->file_pos, 0); /* start - extent_offset */ -+ new->file_pos); /* start - extent_offset */ - if (ret) { - btrfs_abort_transaction(trans, root, ret); - goto out_free_path; -@@ -4217,6 +4223,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, - - } - -+static int truncate_inline_extent(struct inode *inode, -+ struct btrfs_path *path, -+ struct btrfs_key *found_key, -+ const u64 item_end, -+ const u64 new_size) -+{ -+ struct extent_buffer *leaf = path->nodes[0]; -+ int slot = path->slots[0]; -+ struct btrfs_file_extent_item *fi; -+ u32 size = (u32)(new_size - found_key->offset); -+ struct btrfs_root *root = BTRFS_I(inode)->root; -+ -+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); -+ -+ if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { -+ loff_t offset = new_size; -+ loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); -+ -+ /* -+ * Zero out the remaining of the last page of our inline extent, -+ * instead of directly truncating our inline extent here - that -+ * would be much more complex (decompressing all the data, then -+ * compressing the truncated data, which might be bigger than -+ * the size of the inline extent, resize the extent, etc). -+ * We release the path because to get the page we might need to -+ * read the extent item from disk (data not in the page cache). -+ */ -+ btrfs_release_path(path); -+ return btrfs_truncate_page(inode, offset, page_end - offset, 0); -+ } -+ -+ btrfs_set_file_extent_ram_bytes(leaf, fi, size); -+ size = btrfs_file_extent_calc_inline_size(size); -+ btrfs_truncate_item(root, path, size, 1); -+ -+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) -+ inode_sub_bytes(inode, item_end + 1 - new_size); -+ -+ return 0; -+} -+ - /* - * this can truncate away extent items, csum items and directory items. - * It starts at a high offset and removes keys until it can't find -@@ -4411,27 +4458,40 @@ search_again: - * special encodings - */ - if (!del_item && -- btrfs_file_extent_compression(leaf, fi) == 0 && - btrfs_file_extent_encryption(leaf, fi) == 0 && - btrfs_file_extent_other_encoding(leaf, fi) == 0) { -- u32 size = new_size - found_key.offset; -- -- if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) -- inode_sub_bytes(inode, item_end + 1 - -- new_size); - - /* -- * update the ram bytes to properly reflect -- * the new size of our item -+ * Need to release path in order to truncate a -+ * compressed extent. So delete any accumulated -+ * extent items so far. - */ -- btrfs_set_file_extent_ram_bytes(leaf, fi, size); -- size = -- btrfs_file_extent_calc_inline_size(size); -- btrfs_truncate_item(root, path, size, 1); -+ if (btrfs_file_extent_compression(leaf, fi) != -+ BTRFS_COMPRESS_NONE && pending_del_nr) { -+ err = btrfs_del_items(trans, root, path, -+ pending_del_slot, -+ pending_del_nr); -+ if (err) { -+ btrfs_abort_transaction(trans, -+ root, -+ err); -+ goto error; -+ } -+ pending_del_nr = 0; -+ } -+ -+ err = truncate_inline_extent(inode, path, -+ &found_key, -+ item_end, -+ new_size); -+ if (err) { -+ btrfs_abort_transaction(trans, -+ root, err); -+ goto error; -+ } - } else if (test_bit(BTRFS_ROOT_REF_COWS, - &root->state)) { -- inode_sub_bytes(inode, item_end + 1 - -- found_key.offset); -+ inode_sub_bytes(inode, item_end + 1 - new_size); - } - } - delete: -@@ -4461,7 +4521,7 @@ delete: - ret = btrfs_free_extent(trans, root, extent_start, - extent_num_bytes, 0, - btrfs_header_owner(leaf), -- ino, extent_offset, 0); -+ ino, extent_offset); - BUG_ON(ret); - if (btrfs_should_throttle_delayed_refs(trans, root)) - btrfs_async_run_delayed_refs(root, -diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c -index 8d20f3b..6548a36 100644 ---- a/fs/btrfs/ioctl.c -+++ b/fs/btrfs/ioctl.c -@@ -3203,41 +3203,6 @@ out: - return ret; - } - --/* Helper to check and see if this root currently has a ref on the given disk -- * bytenr. If it does then we need to update the quota for this root. This -- * doesn't do anything if quotas aren't enabled. -- */ --static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, -- u64 disko) --{ -- struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); -- struct ulist *roots; -- struct ulist_iterator uiter; -- struct ulist_node *root_node = NULL; -- int ret; -- -- if (!root->fs_info->quota_enabled) -- return 1; -- -- btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); -- ret = btrfs_find_all_roots(trans, root->fs_info, disko, -- tree_mod_seq_elem.seq, &roots); -- if (ret < 0) -- goto out; -- ret = 0; -- ULIST_ITER_INIT(&uiter); -- while ((root_node = ulist_next(roots, &uiter))) { -- if (root_node->val == root->objectid) { -- ret = 1; -- break; -- } -- } -- ulist_free(roots); --out: -- btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); -- return ret; --} -- - static int clone_finish_inode_update(struct btrfs_trans_handle *trans, - struct inode *inode, - u64 endoff, -@@ -3328,6 +3293,150 @@ static void clone_update_extent_map(struct inode *inode, - &BTRFS_I(inode)->runtime_flags); - } - -+/* -+ * Make sure we do not end up inserting an inline extent into a file that has -+ * already other (non-inline) extents. If a file has an inline extent it can -+ * not have any other extents and the (single) inline extent must start at the -+ * file offset 0. Failing to respect these rules will lead to file corruption, -+ * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc -+ * -+ * We can have extents that have been already written to disk or we can have -+ * dirty ranges still in delalloc, in which case the extent maps and items are -+ * created only when we run delalloc, and the delalloc ranges might fall outside -+ * the range we are currently locking in the inode's io tree. So we check the -+ * inode's i_size because of that (i_size updates are done while holding the -+ * i_mutex, which we are holding here). -+ * We also check to see if the inode has a size not greater than "datal" but has -+ * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are -+ * protected against such concurrent fallocate calls by the i_mutex). -+ * -+ * If the file has no extents but a size greater than datal, do not allow the -+ * copy because we would need turn the inline extent into a non-inline one (even -+ * with NO_HOLES enabled). If we find our destination inode only has one inline -+ * extent, just overwrite it with the source inline extent if its size is less -+ * than the source extent's size, or we could copy the source inline extent's -+ * data into the destination inode's inline extent if the later is greater then -+ * the former. -+ */ -+static int clone_copy_inline_extent(struct inode *src, -+ struct inode *dst, -+ struct btrfs_trans_handle *trans, -+ struct btrfs_path *path, -+ struct btrfs_key *new_key, -+ const u64 drop_start, -+ const u64 datal, -+ const u64 skip, -+ const u64 size, -+ char *inline_data) -+{ -+ struct btrfs_root *root = BTRFS_I(dst)->root; -+ const u64 aligned_end = ALIGN(new_key->offset + datal, -+ root->sectorsize); -+ int ret; -+ struct btrfs_key key; -+ -+ if (new_key->offset > 0) -+ return -EOPNOTSUPP; -+ -+ key.objectid = btrfs_ino(dst); -+ key.type = BTRFS_EXTENT_DATA_KEY; -+ key.offset = 0; -+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); -+ if (ret < 0) { -+ return ret; -+ } else if (ret > 0) { -+ if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { -+ ret = btrfs_next_leaf(root, path); -+ if (ret < 0) -+ return ret; -+ else if (ret > 0) -+ goto copy_inline_extent; -+ } -+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); -+ if (key.objectid == btrfs_ino(dst) && -+ key.type == BTRFS_EXTENT_DATA_KEY) { -+ ASSERT(key.offset > 0); -+ return -EOPNOTSUPP; -+ } -+ } else if (i_size_read(dst) <= datal) { -+ struct btrfs_file_extent_item *ei; -+ u64 ext_len; -+ -+ /* -+ * If the file size is <= datal, make sure there are no other -+ * extents following (can happen do to an fallocate call with -+ * the flag FALLOC_FL_KEEP_SIZE). -+ */ -+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0], -+ struct btrfs_file_extent_item); -+ /* -+ * If it's an inline extent, it can not have other extents -+ * following it. -+ */ -+ if (btrfs_file_extent_type(path->nodes[0], ei) == -+ BTRFS_FILE_EXTENT_INLINE) -+ goto copy_inline_extent; -+ -+ ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); -+ if (ext_len > aligned_end) -+ return -EOPNOTSUPP; -+ -+ ret = btrfs_next_item(root, path); -+ if (ret < 0) { -+ return ret; -+ } else if (ret == 0) { -+ btrfs_item_key_to_cpu(path->nodes[0], &key, -+ path->slots[0]); -+ if (key.objectid == btrfs_ino(dst) && -+ key.type == BTRFS_EXTENT_DATA_KEY) -+ return -EOPNOTSUPP; -+ } -+ } -+ -+copy_inline_extent: -+ /* -+ * We have no extent items, or we have an extent at offset 0 which may -+ * or may not be inlined. All these cases are dealt the same way. -+ */ -+ if (i_size_read(dst) > datal) { -+ /* -+ * If the destination inode has an inline extent... -+ * This would require copying the data from the source inline -+ * extent into the beginning of the destination's inline extent. -+ * But this is really complex, both extents can be compressed -+ * or just one of them, which would require decompressing and -+ * re-compressing data (which could increase the new compressed -+ * size, not allowing the compressed data to fit anymore in an -+ * inline extent). -+ * So just don't support this case for now (it should be rare, -+ * we are not really saving space when cloning inline extents). -+ */ -+ return -EOPNOTSUPP; -+ } -+ -+ btrfs_release_path(path); -+ ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); -+ if (ret) -+ return ret; -+ ret = btrfs_insert_empty_item(trans, root, path, new_key, size); -+ if (ret) -+ return ret; -+ -+ if (skip) { -+ const u32 start = btrfs_file_extent_calc_inline_size(0); -+ -+ memmove(inline_data + start, inline_data + start + skip, datal); -+ } -+ -+ write_extent_buffer(path->nodes[0], inline_data, -+ btrfs_item_ptr_offset(path->nodes[0], -+ path->slots[0]), -+ size); -+ inode_add_bytes(dst, datal); -+ -+ return 0; -+} -+ - /** - * btrfs_clone() - clone a range from inode file to another - * -@@ -3352,9 +3461,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, - u32 nritems; - int slot; - int ret; -- int no_quota; - const u64 len = olen_aligned; -- u64 last_disko = 0; - u64 last_dest_end = destoff; - - ret = -ENOMEM; -@@ -3400,7 +3507,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, - - nritems = btrfs_header_nritems(path->nodes[0]); - process_slot: -- no_quota = 1; - if (path->slots[0] >= nritems) { - ret = btrfs_next_leaf(BTRFS_I(src)->root, path); - if (ret < 0) -@@ -3552,35 +3658,13 @@ process_slot: - btrfs_set_file_extent_num_bytes(leaf, extent, - datal); - -- /* -- * We need to look up the roots that point at -- * this bytenr and see if the new root does. If -- * it does not we need to make sure we update -- * quotas appropriately. -- */ -- if (disko && root != BTRFS_I(src)->root && -- disko != last_disko) { -- no_quota = check_ref(trans, root, -- disko); -- if (no_quota < 0) { -- btrfs_abort_transaction(trans, -- root, -- ret); -- btrfs_end_transaction(trans, -- root); -- ret = no_quota; -- goto out; -- } -- } -- - if (disko) { - inode_add_bytes(inode, datal); - ret = btrfs_inc_extent_ref(trans, root, - disko, diskl, 0, - root->root_key.objectid, - btrfs_ino(inode), -- new_key.offset - datao, -- no_quota); -+ new_key.offset - datao); - if (ret) { - btrfs_abort_transaction(trans, - root, -@@ -3594,21 +3678,6 @@ process_slot: - } else if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 skip = 0; - u64 trim = 0; -- u64 aligned_end = 0; -- -- /* -- * Don't copy an inline extent into an offset -- * greater than zero. Having an inline extent -- * at such an offset results in chaos as btrfs -- * isn't prepared for such cases. Just skip -- * this case for the same reasons as commented -- * at btrfs_ioctl_clone(). -- */ -- if (last_dest_end > 0) { -- ret = -EOPNOTSUPP; -- btrfs_end_transaction(trans, root); -- goto out; -- } - - if (off > key.offset) { - skip = off - key.offset; -@@ -3626,42 +3695,22 @@ process_slot: - size -= skip + trim; - datal -= skip + trim; - -- aligned_end = ALIGN(new_key.offset + datal, -- root->sectorsize); -- ret = btrfs_drop_extents(trans, root, inode, -- drop_start, -- aligned_end, -- 1); -+ ret = clone_copy_inline_extent(src, inode, -+ trans, path, -+ &new_key, -+ drop_start, -+ datal, -+ skip, size, buf); - if (ret) { - if (ret != -EOPNOTSUPP) - btrfs_abort_transaction(trans, -- root, ret); -- btrfs_end_transaction(trans, root); -- goto out; -- } -- -- ret = btrfs_insert_empty_item(trans, root, path, -- &new_key, size); -- if (ret) { -- btrfs_abort_transaction(trans, root, -- ret); -+ root, -+ ret); - btrfs_end_transaction(trans, root); - goto out; - } -- -- if (skip) { -- u32 start = -- btrfs_file_extent_calc_inline_size(0); -- memmove(buf+start, buf+start+skip, -- datal); -- } -- - leaf = path->nodes[0]; - slot = path->slots[0]; -- write_extent_buffer(leaf, buf, -- btrfs_item_ptr_offset(leaf, slot), -- size); -- inode_add_bytes(inode, datal); - } - - /* If we have an implicit hole (NO_HOLES feature). */ -diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c -index 303babe..ab507e3 100644 ---- a/fs/btrfs/relocation.c -+++ b/fs/btrfs/relocation.c -@@ -1716,7 +1716,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, - ret = btrfs_inc_extent_ref(trans, root, new_bytenr, - num_bytes, parent, - btrfs_header_owner(leaf), -- key.objectid, key.offset, 1); -+ key.objectid, key.offset); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - break; -@@ -1724,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, - - ret = btrfs_free_extent(trans, root, bytenr, num_bytes, - parent, btrfs_header_owner(leaf), -- key.objectid, key.offset, 1); -+ key.objectid, key.offset); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - break; -@@ -1900,23 +1900,21 @@ again: - - ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, - path->nodes[level]->start, -- src->root_key.objectid, level - 1, 0, -- 1); -+ src->root_key.objectid, level - 1, 0); - BUG_ON(ret); - ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, - 0, dest->root_key.objectid, level - 1, -- 0, 1); -+ 0); - BUG_ON(ret); - - ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, - path->nodes[level]->start, -- src->root_key.objectid, level - 1, 0, -- 1); -+ src->root_key.objectid, level - 1, 0); - BUG_ON(ret); - - ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, - 0, dest->root_key.objectid, level - 1, -- 0, 1); -+ 0); - BUG_ON(ret); - - btrfs_unlock_up_safe(path, 0); -@@ -2745,7 +2743,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, - node->eb->start, blocksize, - upper->eb->start, - btrfs_header_owner(upper->eb), -- node->level, 0, 1); -+ node->level, 0); - BUG_ON(ret); - - ret = btrfs_drop_subtree(trans, root, eb, upper->eb); -diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c -index a739b82..23bb2e4 100644 ---- a/fs/btrfs/send.c -+++ b/fs/btrfs/send.c -@@ -2353,8 +2353,14 @@ static int send_subvol_begin(struct send_ctx *sctx) - } - - TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); -- TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, -- sctx->send_root->root_item.uuid); -+ -+ if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid)) -+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, -+ sctx->send_root->root_item.received_uuid); -+ else -+ TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, -+ sctx->send_root->root_item.uuid); -+ - TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, - le64_to_cpu(sctx->send_root->root_item.ctransid)); - if (parent_root) { -diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c -index 1bbaace..6f8af2d 100644 ---- a/fs/btrfs/tree-log.c -+++ b/fs/btrfs/tree-log.c -@@ -691,7 +691,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, - ret = btrfs_inc_extent_ref(trans, root, - ins.objectid, ins.offset, - 0, root->root_key.objectid, -- key->objectid, offset, 0); -+ key->objectid, offset); - if (ret) - goto out; - } else { -diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c -index 6f518c9..1fcd7b6 100644 ---- a/fs/btrfs/xattr.c -+++ b/fs/btrfs/xattr.c -@@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) - /* check to make sure this item is what we want */ - if (found_key.objectid != key.objectid) - break; -- if (found_key.type != BTRFS_XATTR_ITEM_KEY) -+ if (found_key.type > BTRFS_XATTR_ITEM_KEY) - break; -+ if (found_key.type < BTRFS_XATTR_ITEM_KEY) -+ goto next; - - di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - if (verify_dir_item(root, leaf, di)) -diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c -index 51cb02d..fe2c982 100644 ---- a/fs/ceph/mds_client.c -+++ b/fs/ceph/mds_client.c -@@ -1935,7 +1935,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, - - len = sizeof(*head) + - pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + -- sizeof(struct timespec); -+ sizeof(struct ceph_timespec); - - /* calculate (max) length for cap releases */ - len += sizeof(struct ceph_mds_request_release) * -diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c -index c711be8..9c8d233 100644 ---- a/fs/debugfs/inode.c -+++ b/fs/debugfs/inode.c -@@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) - dput(dentry); - dentry = ERR_PTR(-EEXIST); - } -- if (IS_ERR(dentry)) -+ -+ if (IS_ERR(dentry)) { - mutex_unlock(&d_inode(parent)->i_mutex); -+ simple_release_fs(&debugfs_mount, &debugfs_mount_count); -+ } -+ - return dentry; - } - -diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c -index 4573155..2fab243 100644 ---- a/fs/ext4/crypto.c -+++ b/fs/ext4/crypto.c -@@ -411,7 +411,13 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) - ext4_lblk_t lblk = ex->ee_block; - ext4_fsblk_t pblk = ext4_ext_pblock(ex); - unsigned int len = ext4_ext_get_actual_len(ex); -- int err = 0; -+ int ret, err = 0; -+ -+#if 0 -+ ext4_msg(inode->i_sb, KERN_CRIT, -+ "ext4_encrypted_zeroout ino %lu lblk %u len %u", -+ (unsigned long) inode->i_ino, lblk, len); -+#endif - - BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE); - -@@ -437,17 +443,26 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) - goto errout; - } - bio->bi_bdev = inode->i_sb->s_bdev; -- bio->bi_iter.bi_sector = pblk; -- err = bio_add_page(bio, ciphertext_page, -+ bio->bi_iter.bi_sector = -+ pblk << (inode->i_sb->s_blocksize_bits - 9); -+ ret = bio_add_page(bio, ciphertext_page, - inode->i_sb->s_blocksize, 0); -- if (err) { -+ if (ret != inode->i_sb->s_blocksize) { -+ /* should never happen! */ -+ ext4_msg(inode->i_sb, KERN_ERR, -+ "bio_add_page failed: %d", ret); -+ WARN_ON(1); - bio_put(bio); -+ err = -EIO; - goto errout; - } - err = submit_bio_wait(WRITE, bio); -+ if ((err == 0) && bio->bi_error) -+ err = -EIO; - bio_put(bio); - if (err) - goto errout; -+ lblk++; pblk++; - } - err = 0; - errout: -diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c -index d418431..e770c1ee 100644 ---- a/fs/ext4/ext4_jbd2.c -+++ b/fs/ext4/ext4_jbd2.c -@@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) - return 0; - } - -+ err = handle->h_err; - if (!handle->h_transaction) { -- err = jbd2_journal_stop(handle); -- return handle->h_err ? handle->h_err : err; -+ rc = jbd2_journal_stop(handle); -+ return err ? err : rc; - } - - sb = handle->h_transaction->t_journal->j_private; -- err = handle->h_err; - rc = jbd2_journal_stop(handle); - - if (!err) -diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c -index 2553aa8..7f486e3 100644 ---- a/fs/ext4/extents.c -+++ b/fs/ext4/extents.c -@@ -3558,6 +3558,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, - max_zeroout = sbi->s_extent_max_zeroout_kb >> - (inode->i_sb->s_blocksize_bits - 10); - -+ if (ext4_encrypted_inode(inode)) -+ max_zeroout = 0; -+ - /* If extent is less than s_max_zeroout_kb, zeroout directly */ - if (max_zeroout && (ee_len <= max_zeroout)) { - err = ext4_ext_zeroout(inode, ex); -diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c -index 84ba4d2..17fbe38 100644 ---- a/fs/ext4/page-io.c -+++ b/fs/ext4/page-io.c -@@ -425,6 +425,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, - struct buffer_head *bh, *head; - int ret = 0; - int nr_submitted = 0; -+ int nr_to_submit = 0; - - blocksize = 1 << inode->i_blkbits; - -@@ -477,11 +478,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, - unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); - } - set_buffer_async_write(bh); -+ nr_to_submit++; - } while ((bh = bh->b_this_page) != head); - - bh = head = page_buffers(page); - -- if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { -+ if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && -+ nr_to_submit) { - data_page = ext4_encrypt(inode, page); - if (IS_ERR(data_page)) { - ret = PTR_ERR(data_page); -diff --git a/fs/ext4/super.c b/fs/ext4/super.c -index a63c7b0..df84bd2 100644 ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -394,9 +394,13 @@ static void ext4_handle_error(struct super_block *sb) - smp_wmb(); - sb->s_flags |= MS_RDONLY; - } -- if (test_opt(sb, ERRORS_PANIC)) -+ if (test_opt(sb, ERRORS_PANIC)) { -+ if (EXT4_SB(sb)->s_journal && -+ !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) -+ return; - panic("EXT4-fs (device %s): panic forced after error\n", - sb->s_id); -+ } - } - - #define ext4_error_ratelimit(sb) \ -@@ -585,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function, - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); - save_error_info(sb, function, line); - } -- if (test_opt(sb, ERRORS_PANIC)) -+ if (test_opt(sb, ERRORS_PANIC)) { -+ if (EXT4_SB(sb)->s_journal && -+ !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) -+ return; - panic("EXT4-fs panic from previous error\n"); -+ } - } - - void __ext4_msg(struct super_block *sb, -diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c -index 8270fe9..37023d0 100644 ---- a/fs/jbd2/journal.c -+++ b/fs/jbd2/journal.c -@@ -2071,8 +2071,12 @@ static void __journal_abort_soft (journal_t *journal, int errno) - - __jbd2_journal_abort_hard(journal); - -- if (errno) -+ if (errno) { - jbd2_journal_update_sb_errno(journal); -+ write_lock(&journal->j_state_lock); -+ journal->j_flags |= JBD2_REC_ERR; -+ write_unlock(&journal->j_state_lock); -+ } - } - - /** -diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c -index 326d9e1..ffdf9b9 100644 ---- a/fs/nfs/inode.c -+++ b/fs/nfs/inode.c -@@ -1824,7 +1824,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) - if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) - nfsi->attr_gencount = fattr->gencount; - } -- invalid &= ~NFS_INO_INVALID_ATTR; -+ -+ /* Don't declare attrcache up to date if there were no attrs! */ -+ if (fattr->valid != 0) -+ invalid &= ~NFS_INO_INVALID_ATTR; -+ - /* Don't invalidate the data if we were to blame */ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) - || S_ISLNK(inode->i_mode))) -diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c -index 223bedd..10410e8 100644 ---- a/fs/nfs/nfs4client.c -+++ b/fs/nfs/nfs4client.c -@@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) - return ret; - idr_preload(GFP_KERNEL); - spin_lock(&nn->nfs_client_lock); -- ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); -+ ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT); - if (ret >= 0) - clp->cl_cb_ident = ret; - spin_unlock(&nn->nfs_client_lock); -diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c -index 8abe271..abf5cae 100644 ---- a/fs/nfs/pnfs.c -+++ b/fs/nfs/pnfs.c -@@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo, - - dprintk("--> %s\n", __func__); - -- lgp = kzalloc(sizeof(*lgp), gfp_flags); -- if (lgp == NULL) -- return NULL; -+ /* -+ * Synchronously retrieve layout information from server and -+ * store in lseg. If we race with a concurrent seqid morphing -+ * op, then re-send the LAYOUTGET. -+ */ -+ do { -+ lgp = kzalloc(sizeof(*lgp), gfp_flags); -+ if (lgp == NULL) -+ return NULL; -+ -+ i_size = i_size_read(ino); -+ -+ lgp->args.minlength = PAGE_CACHE_SIZE; -+ if (lgp->args.minlength > range->length) -+ lgp->args.minlength = range->length; -+ if (range->iomode == IOMODE_READ) { -+ if (range->offset >= i_size) -+ lgp->args.minlength = 0; -+ else if (i_size - range->offset < lgp->args.minlength) -+ lgp->args.minlength = i_size - range->offset; -+ } -+ lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; -+ lgp->args.range = *range; -+ lgp->args.type = server->pnfs_curr_ld->id; -+ lgp->args.inode = ino; -+ lgp->args.ctx = get_nfs_open_context(ctx); -+ lgp->gfp_flags = gfp_flags; -+ lgp->cred = lo->plh_lc_cred; - -- i_size = i_size_read(ino); -+ lseg = nfs4_proc_layoutget(lgp, gfp_flags); -+ } while (lseg == ERR_PTR(-EAGAIN)); - -- lgp->args.minlength = PAGE_CACHE_SIZE; -- if (lgp->args.minlength > range->length) -- lgp->args.minlength = range->length; -- if (range->iomode == IOMODE_READ) { -- if (range->offset >= i_size) -- lgp->args.minlength = 0; -- else if (i_size - range->offset < lgp->args.minlength) -- lgp->args.minlength = i_size - range->offset; -- } -- lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; -- lgp->args.range = *range; -- lgp->args.type = server->pnfs_curr_ld->id; -- lgp->args.inode = ino; -- lgp->args.ctx = get_nfs_open_context(ctx); -- lgp->gfp_flags = gfp_flags; -- lgp->cred = lo->plh_lc_cred; -- -- /* Synchronously retrieve layout information from server and -- * store in lseg. -- */ -- lseg = nfs4_proc_layoutget(lgp, gfp_flags); - if (IS_ERR(lseg)) { - switch (PTR_ERR(lseg)) { - case -ENOMEM: -@@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) - /* existing state ID, make sure the sequence number matches. */ - if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { - dprintk("%s forget reply due to sequence\n", __func__); -+ status = -EAGAIN; - goto out_forget_reply; - } - pnfs_set_layout_stateid(lo, &res->stateid, false); -diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c -index 0f1d569..0dea0c2 100644 ---- a/fs/nfsd/nfs4state.c -+++ b/fs/nfsd/nfs4state.c -@@ -765,16 +765,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s) - s->sc_type = 0; - } - --static void -+/** -+ * nfs4_get_existing_delegation - Discover if this delegation already exists -+ * @clp: a pointer to the nfs4_client we're granting a delegation to -+ * @fp: a pointer to the nfs4_file we're granting a delegation on -+ * -+ * Return: -+ * On success: NULL if an existing delegation was not found. -+ * -+ * On error: -EAGAIN if one was previously granted to this nfs4_client -+ * for this nfs4_file. -+ * -+ */ -+ -+static int -+nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) -+{ -+ struct nfs4_delegation *searchdp = NULL; -+ struct nfs4_client *searchclp = NULL; -+ -+ lockdep_assert_held(&state_lock); -+ lockdep_assert_held(&fp->fi_lock); -+ -+ list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { -+ searchclp = searchdp->dl_stid.sc_client; -+ if (clp == searchclp) { -+ return -EAGAIN; -+ } -+ } -+ return 0; -+} -+ -+/** -+ * hash_delegation_locked - Add a delegation to the appropriate lists -+ * @dp: a pointer to the nfs4_delegation we are adding. -+ * @fp: a pointer to the nfs4_file we're granting a delegation on -+ * -+ * Return: -+ * On success: NULL if the delegation was successfully hashed. -+ * -+ * On error: -EAGAIN if one was previously granted to this -+ * nfs4_client for this nfs4_file. Delegation is not hashed. -+ * -+ */ -+ -+static int - hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) - { -+ int status; -+ struct nfs4_client *clp = dp->dl_stid.sc_client; -+ - lockdep_assert_held(&state_lock); - lockdep_assert_held(&fp->fi_lock); - -+ status = nfs4_get_existing_delegation(clp, fp); -+ if (status) -+ return status; -+ ++fp->fi_delegees; - atomic_inc(&dp->dl_stid.sc_count); - dp->dl_stid.sc_type = NFS4_DELEG_STID; - list_add(&dp->dl_perfile, &fp->fi_delegations); -- list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); -+ list_add(&dp->dl_perclnt, &clp->cl_delegations); -+ return 0; - } - - static bool -@@ -3360,6 +3412,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, - stp->st_access_bmap = 0; - stp->st_deny_bmap = 0; - stp->st_openstp = NULL; -+ init_rwsem(&stp->st_rwsem); - spin_lock(&oo->oo_owner.so_client->cl_lock); - list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); - spin_lock(&fp->fi_lock); -@@ -3945,6 +3998,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) - return fl; - } - -+/** -+ * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer -+ * @dp: a pointer to the nfs4_delegation we're adding. -+ * -+ * Return: -+ * On success: Return code will be 0 on success. -+ * -+ * On error: -EAGAIN if there was an existing delegation. -+ * nonzero if there is an error in other cases. -+ * -+ */ -+ - static int nfs4_setlease(struct nfs4_delegation *dp) - { - struct nfs4_file *fp = dp->dl_stid.sc_file; -@@ -3976,16 +4041,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) - goto out_unlock; - /* Race breaker */ - if (fp->fi_deleg_file) { -- status = 0; -- ++fp->fi_delegees; -- hash_delegation_locked(dp, fp); -+ status = hash_delegation_locked(dp, fp); - goto out_unlock; - } - fp->fi_deleg_file = filp; -- fp->fi_delegees = 1; -- hash_delegation_locked(dp, fp); -+ fp->fi_delegees = 0; -+ status = hash_delegation_locked(dp, fp); - spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); -+ if (status) { -+ /* Should never happen, this is a new fi_deleg_file */ -+ WARN_ON_ONCE(1); -+ goto out_fput; -+ } - return 0; - out_unlock: - spin_unlock(&fp->fi_lock); -@@ -4005,6 +4073,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, - if (fp->fi_had_conflict) - return ERR_PTR(-EAGAIN); - -+ spin_lock(&state_lock); -+ spin_lock(&fp->fi_lock); -+ status = nfs4_get_existing_delegation(clp, fp); -+ spin_unlock(&fp->fi_lock); -+ spin_unlock(&state_lock); -+ -+ if (status) -+ return ERR_PTR(status); -+ - dp = alloc_init_deleg(clp, fh, odstate); - if (!dp) - return ERR_PTR(-ENOMEM); -@@ -4023,9 +4100,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, - status = -EAGAIN; - goto out_unlock; - } -- ++fp->fi_delegees; -- hash_delegation_locked(dp, fp); -- status = 0; -+ status = hash_delegation_locked(dp, fp); - out_unlock: - spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); -@@ -4187,15 +4262,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf - */ - if (stp) { - /* Stateid was found, this is an OPEN upgrade */ -+ down_read(&stp->st_rwsem); - status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); -- if (status) -+ if (status) { -+ up_read(&stp->st_rwsem); - goto out; -+ } - } else { - stp = open->op_stp; - open->op_stp = NULL; - init_open_stateid(stp, fp, open); -+ down_read(&stp->st_rwsem); - status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); - if (status) { -+ up_read(&stp->st_rwsem); - release_open_stateid(stp); - goto out; - } -@@ -4207,6 +4287,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf - } - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); -+ up_read(&stp->st_rwsem); - - if (nfsd4_has_session(&resp->cstate)) { - if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { -@@ -4819,10 +4900,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ - * revoked delegations are kept only for free_stateid. - */ - return nfserr_bad_stateid; -+ down_write(&stp->st_rwsem); - status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); -- if (status) -- return status; -- return nfs4_check_fh(current_fh, &stp->st_stid); -+ if (status == nfs_ok) -+ status = nfs4_check_fh(current_fh, &stp->st_stid); -+ if (status != nfs_ok) -+ up_write(&stp->st_rwsem); -+ return status; - } - - /* -@@ -4869,6 +4953,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs - return status; - oo = openowner(stp->st_stateowner); - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { -+ up_write(&stp->st_rwsem); - nfs4_put_stid(&stp->st_stid); - return nfserr_bad_stateid; - } -@@ -4899,11 +4984,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - goto out; - oo = openowner(stp->st_stateowner); - status = nfserr_bad_stateid; -- if (oo->oo_flags & NFS4_OO_CONFIRMED) -+ if (oo->oo_flags & NFS4_OO_CONFIRMED) { -+ up_write(&stp->st_rwsem); - goto put_stateid; -+ } - oo->oo_flags |= NFS4_OO_CONFIRMED; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); -+ up_write(&stp->st_rwsem); - dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", - __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); - -@@ -4982,6 +5070,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, - memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - status = nfs_ok; - put_stateid: -+ up_write(&stp->st_rwsem); - nfs4_put_stid(&stp->st_stid); - out: - nfsd4_bump_seqid(cstate, status); -@@ -5035,6 +5124,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - goto out; - update_stateid(&stp->st_stid.sc_stateid); - memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); -+ up_write(&stp->st_rwsem); - - nfsd4_close_open_stateid(stp); - -@@ -5260,6 +5350,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, - stp->st_access_bmap = 0; - stp->st_deny_bmap = open_stp->st_deny_bmap; - stp->st_openstp = open_stp; -+ init_rwsem(&stp->st_rwsem); - list_add(&stp->st_locks, &open_stp->st_locks); - list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); - spin_lock(&fp->fi_lock); -@@ -5428,6 +5519,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - &open_stp, nn); - if (status) - goto out; -+ up_write(&open_stp->st_rwsem); - open_sop = openowner(open_stp->st_stateowner); - status = nfserr_bad_stateid; - if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, -@@ -5435,6 +5527,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - goto out; - status = lookup_or_create_lock_state(cstate, open_stp, lock, - &lock_stp, &new); -+ if (status == nfs_ok) -+ down_write(&lock_stp->st_rwsem); - } else { - status = nfs4_preprocess_seqid_op(cstate, - lock->lk_old_lock_seqid, -@@ -5540,6 +5634,8 @@ out: - seqid_mutating_err(ntohl(status))) - lock_sop->lo_owner.so_seqid++; - -+ up_write(&lock_stp->st_rwsem); -+ - /* - * If this is a new, never-before-used stateid, and we are - * returning an error, then just go ahead and release it. -@@ -5709,6 +5805,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - fput: - fput(filp); - put_stateid: -+ up_write(&stp->st_rwsem); - nfs4_put_stid(&stp->st_stid); - out: - nfsd4_bump_seqid(cstate, status); -diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h -index 583ffc1..31bde12 100644 ---- a/fs/nfsd/state.h -+++ b/fs/nfsd/state.h -@@ -534,15 +534,16 @@ struct nfs4_file { - * Better suggestions welcome. - */ - struct nfs4_ol_stateid { -- struct nfs4_stid st_stid; /* must be first field */ -- struct list_head st_perfile; -- struct list_head st_perstateowner; -- struct list_head st_locks; -- struct nfs4_stateowner * st_stateowner; -- struct nfs4_clnt_odstate * st_clnt_odstate; -- unsigned char st_access_bmap; -- unsigned char st_deny_bmap; -- struct nfs4_ol_stateid * st_openstp; -+ struct nfs4_stid st_stid; -+ struct list_head st_perfile; -+ struct list_head st_perstateowner; -+ struct list_head st_locks; -+ struct nfs4_stateowner *st_stateowner; -+ struct nfs4_clnt_odstate *st_clnt_odstate; -+ unsigned char st_access_bmap; -+ unsigned char st_deny_bmap; -+ struct nfs4_ol_stateid *st_openstp; -+ struct rw_semaphore st_rwsem; - }; - - static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) -diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c -index b7dfac2..12bfa9c 100644 ---- a/fs/ocfs2/namei.c -+++ b/fs/ocfs2/namei.c -@@ -374,6 +374,8 @@ static int ocfs2_mknod(struct inode *dir, - mlog_errno(status); - goto leave; - } -+ /* update inode->i_mode after mask with "umask". */ -+ inode->i_mode = mode; - - handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, - S_ISDIR(mode), -diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h -index f1f32af..3e4ff3f 100644 ---- a/include/linux/ipv6.h -+++ b/include/linux/ipv6.h -@@ -227,7 +227,7 @@ struct ipv6_pinfo { - struct ipv6_ac_socklist *ipv6_ac_list; - struct ipv6_fl_socklist __rcu *ipv6_fl_list; - -- struct ipv6_txoptions *opt; -+ struct ipv6_txoptions __rcu *opt; - struct sk_buff *pktoptions; - struct sk_buff *rxpmtu; - struct inet6_cork cork; -diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h -index df07e78..1abeb82 100644 ---- a/include/linux/jbd2.h -+++ b/include/linux/jbd2.h -@@ -1046,6 +1046,7 @@ struct journal_s - #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file - * data write error in ordered - * mode */ -+#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ - - /* - * Function declarations for the journaling transaction and buffer -diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h -index dd20974..1565324 100644 ---- a/include/linux/mlx5/mlx5_ifc.h -+++ b/include/linux/mlx5/mlx5_ifc.h -@@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { - u8 lro_cap[0x1]; - u8 lro_psh_flag[0x1]; - u8 lro_time_stamp[0x1]; -- u8 reserved_0[0x6]; -+ u8 reserved_0[0x3]; -+ u8 self_lb_en_modifiable[0x1]; -+ u8 reserved_1[0x2]; - u8 max_lso_cap[0x5]; -- u8 reserved_1[0x4]; -+ u8 reserved_2[0x4]; - u8 rss_ind_tbl_cap[0x4]; -- u8 reserved_2[0x3]; -+ u8 reserved_3[0x3]; - u8 tunnel_lso_const_out_ip_id[0x1]; -- u8 reserved_3[0x2]; -+ u8 reserved_4[0x2]; - u8 tunnel_statless_gre[0x1]; - u8 tunnel_stateless_vxlan[0x1]; - -- u8 reserved_4[0x20]; -+ u8 reserved_5[0x20]; - -- u8 reserved_5[0x10]; -+ u8 reserved_6[0x10]; - u8 lro_min_mss_size[0x10]; - -- u8 reserved_6[0x120]; -+ u8 reserved_7[0x120]; - - u8 lro_timer_supported_periods[4][0x20]; - -- u8 reserved_7[0x600]; -+ u8 reserved_8[0x600]; - }; - - struct mlx5_ifc_roce_cap_bits { -@@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits { - }; - - struct mlx5_ifc_modify_tir_bitmask_bits { -- u8 reserved[0x20]; -+ u8 reserved_0[0x20]; - -- u8 reserved1[0x1f]; -+ u8 reserved_1[0x1b]; -+ u8 self_lb_en[0x1]; -+ u8 reserved_2[0x3]; - u8 lro[0x1]; - }; - -diff --git a/include/net/af_unix.h b/include/net/af_unix.h -index b36d837..2a91a05 100644 ---- a/include/net/af_unix.h -+++ b/include/net/af_unix.h -@@ -62,6 +62,7 @@ struct unix_sock { - #define UNIX_GC_CANDIDATE 0 - #define UNIX_GC_MAYBE_CYCLE 1 - struct socket_wq peer_wq; -+ wait_queue_t peer_wake; - }; - - static inline struct unix_sock *unix_sk(const struct sock *sk) -diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h -index aaf9700..fb961a5 100644 ---- a/include/net/ip6_fib.h -+++ b/include/net/ip6_fib.h -@@ -167,7 +167,8 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) - - static inline u32 rt6_get_cookie(const struct rt6_info *rt) - { -- if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE)) -+ if (rt->rt6i_flags & RTF_PCPU || -+ (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) - rt = (struct rt6_info *)(rt->dst.from); - - return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; -diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h -index fa915fa..d49a8f8 100644 ---- a/include/net/ip6_tunnel.h -+++ b/include/net/ip6_tunnel.h -@@ -90,11 +90,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, - err = ip6_local_out_sk(sk, skb); - - if (net_xmit_eval(err) == 0) { -- struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); -+ struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); -+ put_cpu_ptr(tstats); - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; -diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h -index f6dafec..62a750a 100644 ---- a/include/net/ip_tunnels.h -+++ b/include/net/ip_tunnels.h -@@ -287,12 +287,13 @@ static inline void iptunnel_xmit_stats(int err, - struct pcpu_sw_netstats __percpu *stats) - { - if (err > 0) { -- struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats); -+ struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats); - - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += err; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); -+ put_cpu_ptr(tstats); - } else if (err < 0) { - err_stats->tx_errors++; - err_stats->tx_aborted_errors++; -diff --git a/include/net/ipv6.h b/include/net/ipv6.h -index 711cca4..b14e158 100644 ---- a/include/net/ipv6.h -+++ b/include/net/ipv6.h -@@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock; - */ - - struct ipv6_txoptions { -+ atomic_t refcnt; - /* Length of this structure */ - int tot_len; - -@@ -217,7 +218,7 @@ struct ipv6_txoptions { - struct ipv6_opt_hdr *dst0opt; - struct ipv6_rt_hdr *srcrt; /* Routing Header */ - struct ipv6_opt_hdr *dst1opt; -- -+ struct rcu_head rcu; - /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ - }; - -@@ -252,6 +253,24 @@ struct ipv6_fl_socklist { - struct rcu_head rcu; - }; - -+static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) -+{ -+ struct ipv6_txoptions *opt; -+ -+ rcu_read_lock(); -+ opt = rcu_dereference(np->opt); -+ if (opt && !atomic_inc_not_zero(&opt->refcnt)) -+ opt = NULL; -+ rcu_read_unlock(); -+ return opt; -+} -+ -+static inline void txopt_put(struct ipv6_txoptions *opt) -+{ -+ if (opt && atomic_dec_and_test(&opt->refcnt)) -+ kfree_rcu(opt, rcu); -+} -+ - struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); - struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, - struct ip6_flowlabel *fl, -@@ -490,6 +509,7 @@ struct ip6_create_arg { - u32 user; - const struct in6_addr *src; - const struct in6_addr *dst; -+ int iif; - u8 ecn; - }; - -diff --git a/include/net/ndisc.h b/include/net/ndisc.h -index aba5695..b3a7751 100644 ---- a/include/net/ndisc.h -+++ b/include/net/ndisc.h -@@ -182,8 +182,7 @@ int ndisc_rcv(struct sk_buff *skb); - - void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, - const struct in6_addr *solicit, -- const struct in6_addr *daddr, const struct in6_addr *saddr, -- struct sk_buff *oskb); -+ const struct in6_addr *daddr, const struct in6_addr *saddr); - - void ndisc_send_rs(struct net_device *dev, - const struct in6_addr *saddr, const struct in6_addr *daddr); -diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h -index 444faa8..f1ad8f8 100644 ---- a/include/net/sch_generic.h -+++ b/include/net/sch_generic.h -@@ -61,6 +61,9 @@ struct Qdisc { - */ - #define TCQ_F_WARN_NONWC (1 << 16) - #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ -+#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : -+ * qdisc_tree_decrease_qlen() should stop. -+ */ - u32 limit; - const struct Qdisc_ops *ops; - struct qdisc_size_table __rcu *stab; -diff --git a/include/net/switchdev.h b/include/net/switchdev.h -index 319baab..731c40e 100644 ---- a/include/net/switchdev.h -+++ b/include/net/switchdev.h -@@ -272,7 +272,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, - struct net_device *filter_dev, - int idx) - { -- return -EOPNOTSUPP; -+ return idx; - } - - static inline void switchdev_port_fwd_mark_set(struct net_device *dev, -diff --git a/kernel/.gitignore b/kernel/.gitignore -index 790d83c..b3097bd 100644 ---- a/kernel/.gitignore -+++ b/kernel/.gitignore -@@ -5,4 +5,3 @@ config_data.h - config_data.gz - timeconst.h - hz.bc --x509_certificate_list -diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c -index 29ace10..7a0decf 100644 ---- a/kernel/bpf/arraymap.c -+++ b/kernel/bpf/arraymap.c -@@ -104,7 +104,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, - /* all elements already exist */ - return -EEXIST; - -- memcpy(array->value + array->elem_size * index, value, array->elem_size); -+ memcpy(array->value + array->elem_size * index, value, map->value_size); - return 0; - } - -diff --git a/net/core/neighbour.c b/net/core/neighbour.c -index 2b515ba..c169bba 100644 ---- a/net/core/neighbour.c -+++ b/net/core/neighbour.c -@@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, - ndm->ndm_pad2 = 0; - ndm->ndm_flags = pn->flags | NTF_PROXY; - ndm->ndm_type = RTN_UNICAST; -- ndm->ndm_ifindex = pn->dev->ifindex; -+ ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; - ndm->ndm_state = NUD_NONE; - - if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) -@@ -2290,7 +2290,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, - if (h > s_h) - s_idx = 0; - for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { -- if (dev_net(n->dev) != net) -+ if (pneigh_net(n) != net) - continue; - if (idx < s_idx) - goto next; -diff --git a/net/core/scm.c b/net/core/scm.c -index 3b6899b..8a1741b 100644 ---- a/net/core/scm.c -+++ b/net/core/scm.c -@@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) - err = put_user(cmlen, &cm->cmsg_len); - if (!err) { - cmlen = CMSG_SPACE(i*sizeof(int)); -+ if (msg->msg_controllen < cmlen) -+ cmlen = msg->msg_controllen; - msg->msg_control += cmlen; - msg->msg_controllen -= cmlen; - } -diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c -index 5165571..a049050 100644 ---- a/net/dccp/ipv6.c -+++ b/net/dccp/ipv6.c -@@ -202,7 +202,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - - -- final_p = fl6_update_dst(&fl6, np->opt, &final); -+ rcu_read_lock(); -+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); -+ rcu_read_unlock(); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p); - if (IS_ERR(dst)) { -@@ -219,7 +221,10 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) - &ireq->ir_v6_loc_addr, - &ireq->ir_v6_rmt_addr); - fl6.daddr = ireq->ir_v6_rmt_addr; -- err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); -+ rcu_read_lock(); -+ err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), -+ np->tclass); -+ rcu_read_unlock(); - err = net_xmit_eval(err); - } - -@@ -415,6 +420,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, - { - struct inet_request_sock *ireq = inet_rsk(req); - struct ipv6_pinfo *newnp, *np = inet6_sk(sk); -+ struct ipv6_txoptions *opt; - struct inet_sock *newinet; - struct dccp6_sock *newdp6; - struct sock *newsk; -@@ -534,13 +540,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, - * Yes, keeping reference count would be much more clever, but we make - * one more one thing there: reattach optmem to newsk. - */ -- if (np->opt != NULL) -- newnp->opt = ipv6_dup_options(newsk, np->opt); -- -+ opt = rcu_dereference(np->opt); -+ if (opt) { -+ opt = ipv6_dup_options(newsk, opt); -+ RCU_INIT_POINTER(newnp->opt, opt); -+ } - inet_csk(newsk)->icsk_ext_hdr_len = 0; -- if (newnp->opt != NULL) -- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + -- newnp->opt->opt_flen); -+ if (opt) -+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + -+ opt->opt_flen; - - dccp_sync_mss(newsk, dst_mtu(dst)); - -@@ -793,6 +801,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - struct ipv6_pinfo *np = inet6_sk(sk); - struct dccp_sock *dp = dccp_sk(sk); - struct in6_addr *saddr = NULL, *final_p, final; -+ struct ipv6_txoptions *opt; - struct flowi6 fl6; - struct dst_entry *dst; - int addr_type; -@@ -892,7 +901,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - fl6.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - -- final_p = fl6_update_dst(&fl6, np->opt, &final); -+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); -+ final_p = fl6_update_dst(&fl6, opt, &final); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p); - if (IS_ERR(dst)) { -@@ -912,9 +922,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - __ip6_dst_store(sk, dst, NULL, NULL); - - icsk->icsk_ext_hdr_len = 0; -- if (np->opt != NULL) -- icsk->icsk_ext_hdr_len = (np->opt->opt_flen + -- np->opt->opt_nflen); -+ if (opt) -+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; - - inet->inet_dport = usin->sin6_port; - -diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c -index 8e8203d..ef7e2c4 100644 ---- a/net/ipv4/ipmr.c -+++ b/net/ipv4/ipmr.c -@@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - struct mfc_cache *c, struct rtmsg *rtm); - static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, - int cmd); --static void mroute_clean_tables(struct mr_table *mrt); -+static void mroute_clean_tables(struct mr_table *mrt, bool all); - static void ipmr_expire_process(unsigned long arg); - - #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES -@@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) - static void ipmr_free_table(struct mr_table *mrt) - { - del_timer_sync(&mrt->ipmr_expire_timer); -- mroute_clean_tables(mrt); -+ mroute_clean_tables(mrt, true); - kfree(mrt); - } - -@@ -1208,7 +1208,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, - * Close the multicast socket, and clear the vif tables etc - */ - --static void mroute_clean_tables(struct mr_table *mrt) -+static void mroute_clean_tables(struct mr_table *mrt, bool all) - { - int i; - LIST_HEAD(list); -@@ -1217,8 +1217,9 @@ static void mroute_clean_tables(struct mr_table *mrt) - /* Shut down all active vif entries */ - - for (i = 0; i < mrt->maxvif; i++) { -- if (!(mrt->vif_table[i].flags & VIFF_STATIC)) -- vif_delete(mrt, i, 0, &list); -+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) -+ continue; -+ vif_delete(mrt, i, 0, &list); - } - unregister_netdevice_many(&list); - -@@ -1226,7 +1227,7 @@ static void mroute_clean_tables(struct mr_table *mrt) - - for (i = 0; i < MFC_LINES; i++) { - list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { -- if (c->mfc_flags & MFC_STATIC) -+ if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - list_del_rcu(&c->list); - mroute_netlink_event(mrt, c, RTM_DELROUTE); -@@ -1261,7 +1262,7 @@ static void mrtsock_destruct(struct sock *sk) - NETCONFA_IFINDEX_ALL, - net->ipv4.devconf_all); - RCU_INIT_POINTER(mrt->mroute_sk, NULL); -- mroute_clean_tables(mrt); -+ mroute_clean_tables(mrt, false); - } - } - rtnl_unlock(); -diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c -index a8f515b..0a2b61d 100644 ---- a/net/ipv4/tcp_input.c -+++ b/net/ipv4/tcp_input.c -@@ -4457,19 +4457,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int - int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) - { - struct sk_buff *skb; -+ int err = -ENOMEM; -+ int data_len = 0; - bool fragstolen; - - if (size == 0) - return 0; - -- skb = alloc_skb(size, sk->sk_allocation); -+ if (size > PAGE_SIZE) { -+ int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS); -+ -+ data_len = npages << PAGE_SHIFT; -+ size = data_len + (size & ~PAGE_MASK); -+ } -+ skb = alloc_skb_with_frags(size - data_len, data_len, -+ PAGE_ALLOC_COSTLY_ORDER, -+ &err, sk->sk_allocation); - if (!skb) - goto err; - -+ skb_put(skb, size - data_len); -+ skb->data_len = data_len; -+ skb->len = size; -+ - if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) - goto err_free; - -- if (memcpy_from_msg(skb_put(skb, size), msg, size)) -+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); -+ if (err) - goto err_free; - - TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; -@@ -4485,7 +4500,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) - err_free: - kfree_skb(skb); - err: -- return -ENOMEM; -+ return err; -+ - } - - static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) -@@ -5643,6 +5659,7 @@ discard: - } - - tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; -+ tp->copied_seq = tp->rcv_nxt; - tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; - - /* RFC1323: The window in SYN & SYN/ACK segments is -diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index 93898e0..a7739c8 100644 ---- a/net/ipv4/tcp_ipv4.c -+++ b/net/ipv4/tcp_ipv4.c -@@ -922,7 +922,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - } - - md5sig = rcu_dereference_protected(tp->md5sig_info, -- sock_owned_by_user(sk)); -+ sock_owned_by_user(sk) || -+ lockdep_is_held(&sk->sk_lock.slock)); - if (!md5sig) { - md5sig = kmalloc(sizeof(*md5sig), gfp); - if (!md5sig) -diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c -index 7149ebc..04f0a05 100644 ---- a/net/ipv4/tcp_timer.c -+++ b/net/ipv4/tcp_timer.c -@@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk) - syn_set = true; - } else { - if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { -+ /* Some middle-boxes may black-hole Fast Open _after_ -+ * the handshake. Therefore we conservatively disable -+ * Fast Open on this path on recurring timeouts with -+ * few or zero bytes acked after Fast Open. -+ */ -+ if (tp->syn_data_acked && -+ tp->bytes_acked <= tp->rx_opt.mss_clamp) { -+ tcp_fastopen_cache_set(sk, 0, NULL, true, 0); -+ if (icsk->icsk_retransmits == sysctl_tcp_retries1) -+ NET_INC_STATS_BH(sock_net(sk), -+ LINUX_MIB_TCPFASTOPENACTIVEFAIL); -+ } - /* Black hole detection */ - tcp_mtu_probing(icsk, sk); - -diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c -index dd00828..3939dd2 100644 ---- a/net/ipv6/addrconf.c -+++ b/net/ipv6/addrconf.c -@@ -3628,7 +3628,7 @@ static void addrconf_dad_work(struct work_struct *w) - - /* send a neighbour solicitation for our addr */ - addrconf_addr_solict_mult(&ifp->addr, &mcaddr); -- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL); -+ ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); - out: - in6_ifa_put(ifp); - rtnl_unlock(); -diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c -index 44bb66b..38d66dd 100644 ---- a/net/ipv6/af_inet6.c -+++ b/net/ipv6/af_inet6.c -@@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk) - - /* Free tx options */ - -- opt = xchg(&np->opt, NULL); -- if (opt) -- sock_kfree_s(sk, opt, opt->tot_len); -+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); -+ if (opt) { -+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc); -+ txopt_put(opt); -+ } - } - EXPORT_SYMBOL_GPL(inet6_destroy_sock); - -@@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk) - fl6.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - -- final_p = fl6_update_dst(&fl6, np->opt, &final); -+ rcu_read_lock(); -+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), -+ &final); -+ rcu_read_unlock(); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p); - if (IS_ERR(dst)) { -diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c -index 9aadd57..a42a673 100644 ---- a/net/ipv6/datagram.c -+++ b/net/ipv6/datagram.c -@@ -167,8 +167,10 @@ ipv4_connected: - - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - -- opt = flowlabel ? flowlabel->opt : np->opt; -+ rcu_read_lock(); -+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); - final_p = fl6_update_dst(&fl6, opt, &final); -+ rcu_read_unlock(); - - dst = ip6_dst_lookup_flow(sk, &fl6, final_p); - err = 0; -diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c -index ce203b0..ea7c4d6 100644 ---- a/net/ipv6/exthdrs.c -+++ b/net/ipv6/exthdrs.c -@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) - *((char **)&opt2->dst1opt) += dif; - if (opt2->srcrt) - *((char **)&opt2->srcrt) += dif; -+ atomic_set(&opt2->refcnt, 1); - } - return opt2; - } -@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, - return ERR_PTR(-ENOBUFS); - - memset(opt2, 0, tot_len); -- -+ atomic_set(&opt2->refcnt, 1); - opt2->tot_len = tot_len; - p = (char *)(opt2 + 1); - -diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c -index 6927f3f..9beed30 100644 ---- a/net/ipv6/inet6_connection_sock.c -+++ b/net/ipv6/inet6_connection_sock.c -@@ -77,7 +77,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, - memset(fl6, 0, sizeof(*fl6)); - fl6->flowi6_proto = IPPROTO_TCP; - fl6->daddr = ireq->ir_v6_rmt_addr; -- final_p = fl6_update_dst(fl6, np->opt, &final); -+ rcu_read_lock(); -+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); -+ rcu_read_unlock(); - fl6->saddr = ireq->ir_v6_loc_addr; - fl6->flowi6_oif = ireq->ir_iif; - fl6->flowi6_mark = ireq->ir_mark; -@@ -207,7 +209,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, - fl6->fl6_dport = inet->inet_dport; - security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); - -- final_p = fl6_update_dst(fl6, np->opt, &final); -+ rcu_read_lock(); -+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); -+ rcu_read_unlock(); - - dst = __inet6_csk_dst_check(sk, np->dst_cookie); - if (!dst) { -@@ -240,7 +244,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused - /* Restore final destination back after routing done */ - fl6.daddr = sk->sk_v6_daddr; - -- res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); -+ res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), -+ np->tclass); - rcu_read_unlock(); - return res; - } -diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c -index eabffbb..137fca4 100644 ---- a/net/ipv6/ip6_tunnel.c -+++ b/net/ipv6/ip6_tunnel.c -@@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t) - int i; - - for_each_possible_cpu(i) -- ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL); -+ ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); - } - EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); - -diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c -index 0e004cc..35eee72 100644 ---- a/net/ipv6/ip6mr.c -+++ b/net/ipv6/ip6mr.c -@@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, - int cmd); - static int ip6mr_rtm_dumproute(struct sk_buff *skb, - struct netlink_callback *cb); --static void mroute_clean_tables(struct mr6_table *mrt); -+static void mroute_clean_tables(struct mr6_table *mrt, bool all); - static void ipmr_expire_process(unsigned long arg); - - #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES -@@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) - static void ip6mr_free_table(struct mr6_table *mrt) - { - del_timer_sync(&mrt->ipmr_expire_timer); -- mroute_clean_tables(mrt); -+ mroute_clean_tables(mrt, true); - kfree(mrt); - } - -@@ -1542,7 +1542,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, - * Close the multicast socket, and clear the vif tables etc - */ - --static void mroute_clean_tables(struct mr6_table *mrt) -+static void mroute_clean_tables(struct mr6_table *mrt, bool all) - { - int i; - LIST_HEAD(list); -@@ -1552,8 +1552,9 @@ static void mroute_clean_tables(struct mr6_table *mrt) - * Shut down all active vif entries - */ - for (i = 0; i < mrt->maxvif; i++) { -- if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) -- mif6_delete(mrt, i, &list); -+ if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) -+ continue; -+ mif6_delete(mrt, i, &list); - } - unregister_netdevice_many(&list); - -@@ -1562,7 +1563,7 @@ static void mroute_clean_tables(struct mr6_table *mrt) - */ - for (i = 0; i < MFC6_LINES; i++) { - list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { -- if (c->mfc_flags & MFC_STATIC) -+ if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - write_lock_bh(&mrt_lock); - list_del(&c->list); -@@ -1625,7 +1626,7 @@ int ip6mr_sk_done(struct sock *sk) - net->ipv6.devconf_all); - write_unlock_bh(&mrt_lock); - -- mroute_clean_tables(mrt); -+ mroute_clean_tables(mrt, false); - err = 0; - break; - } -diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c -index 63e6956..4449ad1 100644 ---- a/net/ipv6/ipv6_sockglue.c -+++ b/net/ipv6/ipv6_sockglue.c -@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, - icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); - } - } -- opt = xchg(&inet6_sk(sk)->opt, opt); -+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, -+ opt); - sk_dst_reset(sk); - - return opt; -@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, - sk->sk_socket->ops = &inet_dgram_ops; - sk->sk_family = PF_INET; - } -- opt = xchg(&np->opt, NULL); -- if (opt) -- sock_kfree_s(sk, opt, opt->tot_len); -+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, -+ NULL); -+ if (opt) { -+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc); -+ txopt_put(opt); -+ } - pktopt = xchg(&np->pktoptions, NULL); - kfree_skb(pktopt); - -@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, - if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) - break; - -- opt = ipv6_renew_options(sk, np->opt, optname, -+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); -+ opt = ipv6_renew_options(sk, opt, optname, - (struct ipv6_opt_hdr __user *)optval, - optlen); - if (IS_ERR(opt)) { -@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, - retv = 0; - opt = ipv6_update_options(sk, opt); - sticky_done: -- if (opt) -- sock_kfree_s(sk, opt, opt->tot_len); -+ if (opt) { -+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc); -+ txopt_put(opt); -+ } - break; - } - -@@ -486,6 +493,7 @@ sticky_done: - break; - - memset(opt, 0, sizeof(*opt)); -+ atomic_set(&opt->refcnt, 1); - opt->tot_len = sizeof(*opt) + optlen; - retv = -EFAULT; - if (copy_from_user(opt+1, optval, optlen)) -@@ -502,8 +510,10 @@ update: - retv = 0; - opt = ipv6_update_options(sk, opt); - done: -- if (opt) -- sock_kfree_s(sk, opt, opt->tot_len); -+ if (opt) { -+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc); -+ txopt_put(opt); -+ } - break; - } - case IPV6_UNICAST_HOPS: -@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, - case IPV6_RTHDR: - case IPV6_DSTOPTS: - { -+ struct ipv6_txoptions *opt; - - lock_sock(sk); -- len = ipv6_getsockopt_sticky(sk, np->opt, -- optname, optval, len); -+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); -+ len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); - release_sock(sk); - /* check if ipv6_getsockopt_sticky() returns err code */ - if (len < 0) -diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c -index 083b292..41e3b5e 100644 ---- a/net/ipv6/mcast.c -+++ b/net/ipv6/mcast.c -@@ -1651,7 +1651,6 @@ out: - if (!err) { - ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); -- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); - } else { - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - } -@@ -2014,7 +2013,6 @@ out: - if (!err) { - ICMP6MSGOUT_INC_STATS(net, idev, type); - ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); -- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len); - } else - IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); - -diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c -index 64a7135..9ad46cd 100644 ---- a/net/ipv6/ndisc.c -+++ b/net/ipv6/ndisc.c -@@ -553,8 +553,7 @@ static void ndisc_send_unsol_na(struct net_device *dev) - - void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, - const struct in6_addr *solicit, -- const struct in6_addr *daddr, const struct in6_addr *saddr, -- struct sk_buff *oskb) -+ const struct in6_addr *daddr, const struct in6_addr *saddr) - { - struct sk_buff *skb; - struct in6_addr addr_buf; -@@ -590,9 +589,6 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, - ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, - dev->dev_addr); - -- if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb) -- skb_dst_copy(skb, oskb); -- - ndisc_send_skb(skb, daddr, saddr); - } - -@@ -679,12 +675,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) - "%s: trying to ucast probe in NUD_INVALID: %pI6\n", - __func__, target); - } -- ndisc_send_ns(dev, neigh, target, target, saddr, skb); -+ ndisc_send_ns(dev, neigh, target, target, saddr); - } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { - neigh_app_ns(neigh); - } else { - addrconf_addr_solict_mult(target, &mcaddr); -- ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb); -+ ndisc_send_ns(dev, NULL, target, &mcaddr, saddr); - } - } - -diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c -index c7196ad..dc50143 100644 ---- a/net/ipv6/netfilter/nf_conntrack_reasm.c -+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c -@@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data) - /* Creation primitives. */ - static inline struct frag_queue *fq_find(struct net *net, __be32 id, - u32 user, struct in6_addr *src, -- struct in6_addr *dst, u8 ecn) -+ struct in6_addr *dst, int iif, u8 ecn) - { - struct inet_frag_queue *q; - struct ip6_create_arg arg; -@@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, - arg.user = user; - arg.src = src; - arg.dst = dst; -+ arg.iif = iif; - arg.ecn = ecn; - - local_bh_disable(); -@@ -603,7 +604,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) - fhdr = (struct frag_hdr *)skb_transport_header(clone); - - fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, -- ip6_frag_ecn(hdr)); -+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); - if (fq == NULL) { - pr_debug("Can't find and can't create new queue\n"); - goto ret_orig; -diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c -index fdbada156..fe97729 100644 ---- a/net/ipv6/raw.c -+++ b/net/ipv6/raw.c -@@ -732,6 +732,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, - - static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) - { -+ struct ipv6_txoptions *opt_to_free = NULL; - struct ipv6_txoptions opt_space; - DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); - struct in6_addr *daddr, *final_p, final; -@@ -838,8 +839,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) - if (!(opt->opt_nflen|opt->opt_flen)) - opt = NULL; - } -- if (!opt) -- opt = np->opt; -+ if (!opt) { -+ opt = txopt_get(np); -+ opt_to_free = opt; -+ } - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); - opt = ipv6_fixup_options(&opt_space, opt); -@@ -905,6 +908,7 @@ done: - dst_release(dst); - out: - fl6_sock_release(flowlabel); -+ txopt_put(opt_to_free); - return err < 0 ? err : len; - do_confirm: - dst_confirm(dst); -diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c -index f1159bb..04013a9 100644 ---- a/net/ipv6/reassembly.c -+++ b/net/ipv6/reassembly.c -@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) - return fq->id == arg->id && - fq->user == arg->user && - ipv6_addr_equal(&fq->saddr, arg->src) && -- ipv6_addr_equal(&fq->daddr, arg->dst); -+ ipv6_addr_equal(&fq->daddr, arg->dst) && -+ (arg->iif == fq->iif || -+ !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST | -+ IPV6_ADDR_LINKLOCAL))); - } - EXPORT_SYMBOL(ip6_frag_match); - -@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data) - - static struct frag_queue * - fq_find(struct net *net, __be32 id, const struct in6_addr *src, -- const struct in6_addr *dst, u8 ecn) -+ const struct in6_addr *dst, int iif, u8 ecn) - { - struct inet_frag_queue *q; - struct ip6_create_arg arg; -@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, - arg.user = IP6_DEFRAG_LOCAL_DELIVER; - arg.src = src; - arg.dst = dst; -+ arg.iif = iif; - arg.ecn = ecn; - - hash = inet6_hash_frag(id, src, dst); -@@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) - } - - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, -- ip6_frag_ecn(hdr)); -+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); - if (fq) { - int ret; - -diff --git a/net/ipv6/route.c b/net/ipv6/route.c -index 946880a..fd0e674 100644 ---- a/net/ipv6/route.c -+++ b/net/ipv6/route.c -@@ -403,6 +403,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, - } - } - -+static bool __rt6_check_expired(const struct rt6_info *rt) -+{ -+ if (rt->rt6i_flags & RTF_EXPIRES) -+ return time_after(jiffies, rt->dst.expires); -+ else -+ return false; -+} -+ - static bool rt6_check_expired(const struct rt6_info *rt) - { - if (rt->rt6i_flags & RTF_EXPIRES) { -@@ -538,7 +546,7 @@ static void rt6_probe_deferred(struct work_struct *w) - container_of(w, struct __rt6_probe_work, work); - - addrconf_addr_solict_mult(&work->target, &mcaddr); -- ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL); -+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); - dev_put(work->dev); - kfree(work); - } -@@ -1270,7 +1278,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) - - static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) - { -- if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && -+ if (!__rt6_check_expired(rt) && -+ rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && - rt6_check((struct rt6_info *)(rt->dst.from), cookie)) - return &rt->dst; - else -@@ -1290,7 +1299,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) - - rt6_dst_from_metrics_check(rt); - -- if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) -+ if (rt->rt6i_flags & RTF_PCPU || -+ (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from)) - return rt6_dst_from_check(rt, cookie); - else - return rt6_check(rt, cookie); -@@ -1340,6 +1350,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) - rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); - } - -+static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) -+{ -+ return !(rt->rt6i_flags & RTF_CACHE) && -+ (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node); -+} -+ - static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, - const struct ipv6hdr *iph, u32 mtu) - { -@@ -1353,7 +1369,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, - if (mtu >= dst_mtu(dst)) - return; - -- if (rt6->rt6i_flags & RTF_CACHE) { -+ if (!rt6_cache_allowed_for_pmtu(rt6)) { - rt6_do_update_pmtu(rt6, mtu); - } else { - const struct in6_addr *daddr, *saddr; -diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c -index 0909f4e..f30bfdc 100644 ---- a/net/ipv6/syncookies.c -+++ b/net/ipv6/syncookies.c -@@ -225,7 +225,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.daddr = ireq->ir_v6_rmt_addr; -- final_p = fl6_update_dst(&fl6, np->opt, &final); -+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); - fl6.saddr = ireq->ir_v6_loc_addr; - fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = ireq->ir_mark; -diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c -index 97d9314..9e9b77b 100644 ---- a/net/ipv6/tcp_ipv6.c -+++ b/net/ipv6/tcp_ipv6.c -@@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct in6_addr *saddr = NULL, *final_p, final; -+ struct ipv6_txoptions *opt; - struct flowi6 fl6; - struct dst_entry *dst; - int addr_type; -@@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - fl6.fl6_dport = usin->sin6_port; - fl6.fl6_sport = inet->inet_sport; - -- final_p = fl6_update_dst(&fl6, np->opt, &final); -+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); -+ final_p = fl6_update_dst(&fl6, opt, &final); - - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - -@@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, - tcp_fetch_timewait_stamp(sk, dst); - - icsk->icsk_ext_hdr_len = 0; -- if (np->opt) -- icsk->icsk_ext_hdr_len = (np->opt->opt_flen + -- np->opt->opt_nflen); -+ if (opt) -+ icsk->icsk_ext_hdr_len = opt->opt_flen + -+ opt->opt_nflen; - - tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); - -@@ -461,7 +463,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, - fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); - - skb_set_queue_mapping(skb, queue_mapping); -- err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); -+ err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), -+ np->tclass); - err = net_xmit_eval(err); - } - -@@ -991,6 +994,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct inet_request_sock *ireq; - struct ipv6_pinfo *newnp, *np = inet6_sk(sk); - struct tcp6_sock *newtcp6sk; -+ struct ipv6_txoptions *opt; - struct inet_sock *newinet; - struct tcp_sock *newtp; - struct sock *newsk; -@@ -1126,13 +1130,15 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - but we make one more one thing there: reattach optmem - to newsk. - */ -- if (np->opt) -- newnp->opt = ipv6_dup_options(newsk, np->opt); -- -+ opt = rcu_dereference(np->opt); -+ if (opt) { -+ opt = ipv6_dup_options(newsk, opt); -+ RCU_INIT_POINTER(newnp->opt, opt); -+ } - inet_csk(newsk)->icsk_ext_hdr_len = 0; -- if (newnp->opt) -- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + -- newnp->opt->opt_flen); -+ if (opt) -+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + -+ opt->opt_flen; - - tcp_ca_openreq_child(newsk, dst); - -diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c -index 0aba654..8379fc2 100644 ---- a/net/ipv6/udp.c -+++ b/net/ipv6/udp.c -@@ -1107,6 +1107,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) - DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); - struct in6_addr *daddr, *final_p, final; - struct ipv6_txoptions *opt = NULL; -+ struct ipv6_txoptions *opt_to_free = NULL; - struct ip6_flowlabel *flowlabel = NULL; - struct flowi6 fl6; - struct dst_entry *dst; -@@ -1260,8 +1261,10 @@ do_udp_sendmsg: - opt = NULL; - connected = 0; - } -- if (!opt) -- opt = np->opt; -+ if (!opt) { -+ opt = txopt_get(np); -+ opt_to_free = opt; -+ } - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); - opt = ipv6_fixup_options(&opt_space, opt); -@@ -1370,6 +1373,7 @@ release_dst: - out: - dst_release(dst); - fl6_sock_release(flowlabel); -+ txopt_put(opt_to_free); - if (!err) - return len; - /* -diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c -index d1ded37..0ce9da9 100644 ---- a/net/l2tp/l2tp_ip6.c -+++ b/net/l2tp/l2tp_ip6.c -@@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) - DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); - struct in6_addr *daddr, *final_p, final; - struct ipv6_pinfo *np = inet6_sk(sk); -+ struct ipv6_txoptions *opt_to_free = NULL; - struct ipv6_txoptions *opt = NULL; - struct ip6_flowlabel *flowlabel = NULL; - struct dst_entry *dst = NULL; -@@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) - opt = NULL; - } - -- if (opt == NULL) -- opt = np->opt; -+ if (!opt) { -+ opt = txopt_get(np); -+ opt_to_free = opt; -+ } - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); - opt = ipv6_fixup_options(&opt_space, opt); -@@ -631,6 +634,7 @@ done: - dst_release(dst); - out: - fl6_sock_release(flowlabel); -+ txopt_put(opt_to_free); - - return err < 0 ? err : len; - -diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c -index a7a80a6..653d073 100644 ---- a/net/openvswitch/dp_notify.c -+++ b/net/openvswitch/dp_notify.c -@@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work) - struct hlist_node *n; - - hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) { -- if (vport->ops->type != OVS_VPORT_TYPE_NETDEV) -+ if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL) - continue; - - if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH)) -diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c -index f7e8dcc..ac14c48 100644 ---- a/net/openvswitch/vport-netdev.c -+++ b/net/openvswitch/vport-netdev.c -@@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) - if (vport->dev->priv_flags & IFF_OVS_DATAPATH) - ovs_netdev_detach_dev(vport); - -- /* Early release so we can unregister the device */ -+ /* We can be invoked by both explicit vport deletion and -+ * underlying netdev deregistration; delete the link only -+ * if it's not already shutting down. -+ */ -+ if (vport->dev->reg_state == NETREG_REGISTERED) -+ rtnl_delete_link(vport->dev); - dev_put(vport->dev); -- rtnl_delete_link(vport->dev); - vport->dev = NULL; - rtnl_unlock(); - -diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c -index 27b2898..4695a36 100644 ---- a/net/packet/af_packet.c -+++ b/net/packet/af_packet.c -@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk) - kfree_rcu(po->rollover, rcu); - } - -+static bool packet_extra_vlan_len_allowed(const struct net_device *dev, -+ struct sk_buff *skb) -+{ -+ /* Earlier code assumed this would be a VLAN pkt, double-check -+ * this now that we have the actual packet in hand. We can only -+ * do this check on Ethernet devices. -+ */ -+ if (unlikely(dev->type != ARPHRD_ETHER)) -+ return false; -+ -+ skb_reset_mac_header(skb); -+ return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)); -+} -+ - static const struct proto_ops packet_ops; - - static const struct proto_ops packet_ops_spkt; -@@ -1902,18 +1916,10 @@ retry: - goto retry; - } - -- if (len > (dev->mtu + dev->hard_header_len + extra_len)) { -- /* Earlier code assumed this would be a VLAN pkt, -- * double-check this now that we have the actual -- * packet in hand. -- */ -- struct ethhdr *ehdr; -- skb_reset_mac_header(skb); -- ehdr = eth_hdr(skb); -- if (ehdr->h_proto != htons(ETH_P_8021Q)) { -- err = -EMSGSIZE; -- goto out_unlock; -- } -+ if (len > (dev->mtu + dev->hard_header_len + extra_len) && -+ !packet_extra_vlan_len_allowed(dev, skb)) { -+ err = -EMSGSIZE; -+ goto out_unlock; - } - - skb->protocol = proto; -@@ -2332,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len) - return false; - } - -+static void tpacket_set_protocol(const struct net_device *dev, -+ struct sk_buff *skb) -+{ -+ if (dev->type == ARPHRD_ETHER) { -+ skb_reset_mac_header(skb); -+ skb->protocol = eth_hdr(skb)->h_proto; -+ } -+} -+ - static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, - void *frame, struct net_device *dev, int size_max, - __be16 proto, unsigned char *addr, int hlen) -@@ -2368,8 +2383,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, - skb_reserve(skb, hlen); - skb_reset_network_header(skb); - -- if (!packet_use_direct_xmit(po)) -- skb_probe_transport_header(skb, 0); - if (unlikely(po->tp_tx_has_off)) { - int off_min, off_max, off; - off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); -@@ -2415,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, - dev->hard_header_len); - if (unlikely(err)) - return err; -+ if (!skb->protocol) -+ tpacket_set_protocol(dev, skb); - - data += dev->hard_header_len; - to_write -= dev->hard_header_len; -@@ -2449,6 +2464,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, - len = ((to_write > len_max) ? len_max : to_write); - } - -+ skb_probe_transport_header(skb, 0); -+ - return tp_len; - } - -@@ -2493,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) - if (unlikely(!(dev->flags & IFF_UP))) - goto out_put; - -- reserve = dev->hard_header_len + VLAN_HLEN; -+ if (po->sk.sk_socket->type == SOCK_RAW) -+ reserve = dev->hard_header_len; - size_max = po->tx_ring.frame_size - - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - -- if (size_max > dev->mtu + reserve) -- size_max = dev->mtu + reserve; -+ if (size_max > dev->mtu + reserve + VLAN_HLEN) -+ size_max = dev->mtu + reserve + VLAN_HLEN; - - do { - ph = packet_current_frame(po, &po->tx_ring, -@@ -2525,18 +2543,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) - tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, - addr, hlen); - if (likely(tp_len >= 0) && -- tp_len > dev->mtu + dev->hard_header_len) { -- struct ethhdr *ehdr; -- /* Earlier code assumed this would be a VLAN pkt, -- * double-check this now that we have the actual -- * packet in hand. -- */ -+ tp_len > dev->mtu + reserve && -+ !packet_extra_vlan_len_allowed(dev, skb)) -+ tp_len = -EMSGSIZE; - -- skb_reset_mac_header(skb); -- ehdr = eth_hdr(skb); -- if (ehdr->h_proto != htons(ETH_P_8021Q)) -- tp_len = -EMSGSIZE; -- } - if (unlikely(tp_len < 0)) { - if (po->tp_loss) { - __packet_set_status(po, ph, -@@ -2757,18 +2767,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) - - sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - -- if (!gso_type && (len > dev->mtu + reserve + extra_len)) { -- /* Earlier code assumed this would be a VLAN pkt, -- * double-check this now that we have the actual -- * packet in hand. -- */ -- struct ethhdr *ehdr; -- skb_reset_mac_header(skb); -- ehdr = eth_hdr(skb); -- if (ehdr->h_proto != htons(ETH_P_8021Q)) { -- err = -EMSGSIZE; -- goto out_free; -- } -+ if (!gso_type && (len > dev->mtu + reserve + extra_len) && -+ !packet_extra_vlan_len_allowed(dev, skb)) { -+ err = -EMSGSIZE; -+ goto out_free; - } - - skb->protocol = proto; -@@ -2799,8 +2801,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) - len += vnet_hdr_len; - } - -- if (!packet_use_direct_xmit(po)) -- skb_probe_transport_header(skb, reserve); -+ skb_probe_transport_header(skb, reserve); -+ - if (unlikely(extra_len == 4)) - skb->no_fcs = 1; - -diff --git a/net/rds/connection.c b/net/rds/connection.c -index 49adeef..9b2de5e 100644 ---- a/net/rds/connection.c -+++ b/net/rds/connection.c -@@ -190,12 +190,6 @@ new_conn: - } - } - -- if (trans == NULL) { -- kmem_cache_free(rds_conn_slab, conn); -- conn = ERR_PTR(-ENODEV); -- goto out; -- } -- - conn->c_trans = trans; - - ret = trans->conn_alloc(conn, gfp); -diff --git a/net/rds/send.c b/net/rds/send.c -index 4df61a5..859de6f 100644 ---- a/net/rds/send.c -+++ b/net/rds/send.c -@@ -1009,11 +1009,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) - release_sock(sk); - } - -- /* racing with another thread binding seems ok here */ -+ lock_sock(sk); - if (daddr == 0 || rs->rs_bound_addr == 0) { -+ release_sock(sk); - ret = -ENOTCONN; /* XXX not a great errno */ - goto out; - } -+ release_sock(sk); - - if (payload_len > rds_sk_sndbuf(rs)) { - ret = -EMSGSIZE; -diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c -index f43c8f3..7ec667d 100644 ---- a/net/sched/sch_api.c -+++ b/net/sched/sch_api.c -@@ -253,7 +253,8 @@ int qdisc_set_default(const char *name) - } - - /* We know handle. Find qdisc among all qdisc's attached to device -- (root qdisc, all its children, children of children etc.) -+ * (root qdisc, all its children, children of children etc.) -+ * Note: caller either uses rtnl or rcu_read_lock() - */ - - static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) -@@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) - root->handle == handle) - return root; - -- list_for_each_entry(q, &root->list, list) { -+ list_for_each_entry_rcu(q, &root->list, list) { - if (q->handle == handle) - return q; - } -@@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q) - struct Qdisc *root = qdisc_dev(q)->qdisc; - - WARN_ON_ONCE(root == &noop_qdisc); -- list_add_tail(&q->list, &root->list); -+ ASSERT_RTNL(); -+ list_add_tail_rcu(&q->list, &root->list); - } - } - EXPORT_SYMBOL(qdisc_list_add); - - void qdisc_list_del(struct Qdisc *q) - { -- if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) -- list_del(&q->list); -+ if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { -+ ASSERT_RTNL(); -+ list_del_rcu(&q->list); -+ } - } - EXPORT_SYMBOL(qdisc_list_del); - -@@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) - if (n == 0) - return; - drops = max_t(int, n, 0); -+ rcu_read_lock(); - while ((parentid = sch->parent)) { - if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) -- return; -+ break; - -+ if (sch->flags & TCQ_F_NOPARENT) -+ break; -+ /* TODO: perform the search on a per txq basis */ - sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); - if (sch == NULL) { -- WARN_ON(parentid != TC_H_ROOT); -- return; -+ WARN_ON_ONCE(parentid != TC_H_ROOT); -+ break; - } - cops = sch->ops->cl_ops; - if (cops->qlen_notify) { -@@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) - sch->q.qlen -= n; - __qdisc_qstats_drop(sch, drops); - } -+ rcu_read_unlock(); - } - EXPORT_SYMBOL(qdisc_tree_decrease_qlen); - -@@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - } - lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); - if (!netif_is_multiqueue(dev)) -- sch->flags |= TCQ_F_ONETXQUEUE; -+ sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - } - - sch->handle = handle; -diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c -index cb5d4ad..e82a1ad 100644 ---- a/net/sched/sch_generic.c -+++ b/net/sched/sch_generic.c -@@ -737,7 +737,7 @@ static void attach_one_default_qdisc(struct net_device *dev, - return; - } - if (!netif_is_multiqueue(dev)) -- qdisc->flags |= TCQ_F_ONETXQUEUE; -+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - dev_queue->qdisc_sleeping = qdisc; - } - -diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c -index f3cbaec..3e82f04 100644 ---- a/net/sched/sch_mq.c -+++ b/net/sched/sch_mq.c -@@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) - if (qdisc == NULL) - goto err; - priv->qdiscs[ntx] = qdisc; -- qdisc->flags |= TCQ_F_ONETXQUEUE; -+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - } - - sch->flags |= TCQ_F_MQROOT; -@@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, - - *old = dev_graft_qdisc(dev_queue, new); - if (new) -- new->flags |= TCQ_F_ONETXQUEUE; -+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - if (dev->flags & IFF_UP) - dev_activate(dev); - return 0; -diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c -index 3811a74..ad70ecf 100644 ---- a/net/sched/sch_mqprio.c -+++ b/net/sched/sch_mqprio.c -@@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) - goto err; - } - priv->qdiscs[i] = qdisc; -- qdisc->flags |= TCQ_F_ONETXQUEUE; -+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - } - - /* If the mqprio options indicate that hardware should own -@@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, - *old = dev_graft_qdisc(dev_queue, new); - - if (new) -- new->flags |= TCQ_F_ONETXQUEUE; -+ new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; - - if (dev->flags & IFF_UP) - dev_activate(dev); -diff --git a/net/sctp/auth.c b/net/sctp/auth.c -index 4f15b7d..1543e39 100644 ---- a/net/sctp/auth.c -+++ b/net/sctp/auth.c -@@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, - if (!has_sha1) - return -EINVAL; - -- memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], -- hmacs->shmac_num_idents * sizeof(__u16)); -+ for (i = 0; i < hmacs->shmac_num_idents; i++) -+ ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]); - ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + - hmacs->shmac_num_idents * sizeof(__u16)); - return 0; -diff --git a/net/sctp/socket.c b/net/sctp/socket.c -index 17bef01..3ec88be 100644 ---- a/net/sctp/socket.c -+++ b/net/sctp/socket.c -@@ -7375,6 +7375,13 @@ struct proto sctp_prot = { - - #if IS_ENABLED(CONFIG_IPV6) - -+#include <net/transp_v6.h> -+static void sctp_v6_destroy_sock(struct sock *sk) -+{ -+ sctp_destroy_sock(sk); -+ inet6_destroy_sock(sk); -+} -+ - struct proto sctpv6_prot = { - .name = "SCTPv6", - .owner = THIS_MODULE, -@@ -7384,7 +7391,7 @@ struct proto sctpv6_prot = { - .accept = sctp_accept, - .ioctl = sctp_ioctl, - .init = sctp_init_sock, -- .destroy = sctp_destroy_sock, -+ .destroy = sctp_v6_destroy_sock, - .shutdown = sctp_shutdown, - .setsockopt = sctp_setsockopt, - .getsockopt = sctp_getsockopt, -diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c -index cd7c5f1..86f2e7c 100644 ---- a/net/tipc/udp_media.c -+++ b/net/tipc/udp_media.c -@@ -159,8 +159,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, - struct sk_buff *clone; - struct rtable *rt; - -- if (skb_headroom(skb) < UDP_MIN_HEADROOM) -- pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); -+ if (skb_headroom(skb) < UDP_MIN_HEADROOM) { -+ err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); -+ if (err) -+ goto tx_error; -+ } - - clone = skb_clone(skb, GFP_ATOMIC); - skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); -diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c -index 94f6582..128b098 100644 ---- a/net/unix/af_unix.c -+++ b/net/unix/af_unix.c -@@ -326,6 +326,118 @@ found: - return s; - } - -+/* Support code for asymmetrically connected dgram sockets -+ * -+ * If a datagram socket is connected to a socket not itself connected -+ * to the first socket (eg, /dev/log), clients may only enqueue more -+ * messages if the present receive queue of the server socket is not -+ * "too large". This means there's a second writeability condition -+ * poll and sendmsg need to test. The dgram recv code will do a wake -+ * up on the peer_wait wait queue of a socket upon reception of a -+ * datagram which needs to be propagated to sleeping would-be writers -+ * since these might not have sent anything so far. This can't be -+ * accomplished via poll_wait because the lifetime of the server -+ * socket might be less than that of its clients if these break their -+ * association with it or if the server socket is closed while clients -+ * are still connected to it and there's no way to inform "a polling -+ * implementation" that it should let go of a certain wait queue -+ * -+ * In order to propagate a wake up, a wait_queue_t of the client -+ * socket is enqueued on the peer_wait queue of the server socket -+ * whose wake function does a wake_up on the ordinary client socket -+ * wait queue. This connection is established whenever a write (or -+ * poll for write) hit the flow control condition and broken when the -+ * association to the server socket is dissolved or after a wake up -+ * was relayed. -+ */ -+ -+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, -+ void *key) -+{ -+ struct unix_sock *u; -+ wait_queue_head_t *u_sleep; -+ -+ u = container_of(q, struct unix_sock, peer_wake); -+ -+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, -+ q); -+ u->peer_wake.private = NULL; -+ -+ /* relaying can only happen while the wq still exists */ -+ u_sleep = sk_sleep(&u->sk); -+ if (u_sleep) -+ wake_up_interruptible_poll(u_sleep, key); -+ -+ return 0; -+} -+ -+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) -+{ -+ struct unix_sock *u, *u_other; -+ int rc; -+ -+ u = unix_sk(sk); -+ u_other = unix_sk(other); -+ rc = 0; -+ spin_lock(&u_other->peer_wait.lock); -+ -+ if (!u->peer_wake.private) { -+ u->peer_wake.private = other; -+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake); -+ -+ rc = 1; -+ } -+ -+ spin_unlock(&u_other->peer_wait.lock); -+ return rc; -+} -+ -+static void unix_dgram_peer_wake_disconnect(struct sock *sk, -+ struct sock *other) -+{ -+ struct unix_sock *u, *u_other; -+ -+ u = unix_sk(sk); -+ u_other = unix_sk(other); -+ spin_lock(&u_other->peer_wait.lock); -+ -+ if (u->peer_wake.private == other) { -+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); -+ u->peer_wake.private = NULL; -+ } -+ -+ spin_unlock(&u_other->peer_wait.lock); -+} -+ -+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, -+ struct sock *other) -+{ -+ unix_dgram_peer_wake_disconnect(sk, other); -+ wake_up_interruptible_poll(sk_sleep(sk), -+ POLLOUT | -+ POLLWRNORM | -+ POLLWRBAND); -+} -+ -+/* preconditions: -+ * - unix_peer(sk) == other -+ * - association is stable -+ */ -+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) -+{ -+ int connected; -+ -+ connected = unix_dgram_peer_wake_connect(sk, other); -+ -+ if (unix_recvq_full(other)) -+ return 1; -+ -+ if (connected) -+ unix_dgram_peer_wake_disconnect(sk, other); -+ -+ return 0; -+} -+ - static inline int unix_writable(struct sock *sk) - { - return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; -@@ -430,6 +542,8 @@ static void unix_release_sock(struct sock *sk, int embrion) - skpair->sk_state_change(skpair); - sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); - } -+ -+ unix_dgram_peer_wake_disconnect(sk, skpair); - sock_put(skpair); /* It may now die */ - unix_peer(sk) = NULL; - } -@@ -440,6 +554,7 @@ static void unix_release_sock(struct sock *sk, int embrion) - if (state == TCP_LISTEN) - unix_release_sock(skb->sk, 1); - /* passed fds are erased in the kfree_skb hook */ -+ UNIXCB(skb).consumed = skb->len; - kfree_skb(skb); - } - -@@ -664,6 +779,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) - INIT_LIST_HEAD(&u->link); - mutex_init(&u->readlock); /* single task reading lock */ - init_waitqueue_head(&u->peer_wait); -+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); - unix_insert_socket(unix_sockets_unbound(sk), sk); - out: - if (sk == NULL) -@@ -1031,6 +1147,8 @@ restart: - if (unix_peer(sk)) { - struct sock *old_peer = unix_peer(sk); - unix_peer(sk) = other; -+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); -+ - unix_state_double_unlock(sk, other); - - if (other != old_peer) -@@ -1432,6 +1550,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen - return err; - } - -+static bool unix_passcred_enabled(const struct socket *sock, -+ const struct sock *other) -+{ -+ return test_bit(SOCK_PASSCRED, &sock->flags) || -+ !other->sk_socket || -+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags); -+} -+ - /* - * Some apps rely on write() giving SCM_CREDENTIALS - * We include credentials if source or destination socket -@@ -1442,14 +1568,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, - { - if (UNIXCB(skb).pid) - return; -- if (test_bit(SOCK_PASSCRED, &sock->flags) || -- !other->sk_socket || -- test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { -+ if (unix_passcred_enabled(sock, other)) { - UNIXCB(skb).pid = get_pid(task_tgid(current)); - current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); - } - } - -+static int maybe_init_creds(struct scm_cookie *scm, -+ struct socket *socket, -+ const struct sock *other) -+{ -+ int err; -+ struct msghdr msg = { .msg_controllen = 0 }; -+ -+ err = scm_send(socket, &msg, scm, false); -+ if (err) -+ return err; -+ -+ if (unix_passcred_enabled(socket, other)) { -+ scm->pid = get_pid(task_tgid(current)); -+ current_uid_gid(&scm->creds.uid, &scm->creds.gid); -+ } -+ return err; -+} -+ -+static bool unix_skb_scm_eq(struct sk_buff *skb, -+ struct scm_cookie *scm) -+{ -+ const struct unix_skb_parms *u = &UNIXCB(skb); -+ -+ return u->pid == scm->pid && -+ uid_eq(u->uid, scm->creds.uid) && -+ gid_eq(u->gid, scm->creds.gid) && -+ unix_secdata_eq(scm, skb); -+} -+ - /* - * Send AF_UNIX data. - */ -@@ -1470,6 +1623,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, - struct scm_cookie scm; - int max_level; - int data_len = 0; -+ int sk_locked; - - wait_for_unix_gc(); - err = scm_send(sock, msg, &scm, false); -@@ -1548,12 +1702,14 @@ restart: - goto out_free; - } - -+ sk_locked = 0; - unix_state_lock(other); -+restart_locked: - err = -EPERM; - if (!unix_may_send(sk, other)) - goto out_unlock; - -- if (sock_flag(other, SOCK_DEAD)) { -+ if (unlikely(sock_flag(other, SOCK_DEAD))) { - /* - * Check with 1003.1g - what should - * datagram error -@@ -1561,10 +1717,14 @@ restart: - unix_state_unlock(other); - sock_put(other); - -+ if (!sk_locked) -+ unix_state_lock(sk); -+ - err = 0; -- unix_state_lock(sk); - if (unix_peer(sk) == other) { - unix_peer(sk) = NULL; -+ unix_dgram_peer_wake_disconnect_wakeup(sk, other); -+ - unix_state_unlock(sk); - - unix_dgram_disconnected(sk, other); -@@ -1590,21 +1750,38 @@ restart: - goto out_unlock; - } - -- if (unix_peer(other) != sk && unix_recvq_full(other)) { -- if (!timeo) { -- err = -EAGAIN; -- goto out_unlock; -+ if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { -+ if (timeo) { -+ timeo = unix_wait_for_peer(other, timeo); -+ -+ err = sock_intr_errno(timeo); -+ if (signal_pending(current)) -+ goto out_free; -+ -+ goto restart; - } - -- timeo = unix_wait_for_peer(other, timeo); -+ if (!sk_locked) { -+ unix_state_unlock(other); -+ unix_state_double_lock(sk, other); -+ } - -- err = sock_intr_errno(timeo); -- if (signal_pending(current)) -- goto out_free; -+ if (unix_peer(sk) != other || -+ unix_dgram_peer_wake_me(sk, other)) { -+ err = -EAGAIN; -+ sk_locked = 1; -+ goto out_unlock; -+ } - -- goto restart; -+ if (!sk_locked) { -+ sk_locked = 1; -+ goto restart_locked; -+ } - } - -+ if (unlikely(sk_locked)) -+ unix_state_unlock(sk); -+ - if (sock_flag(other, SOCK_RCVTSTAMP)) - __net_timestamp(skb); - maybe_add_creds(skb, sock, other); -@@ -1618,6 +1795,8 @@ restart: - return len; - - out_unlock: -+ if (sk_locked) -+ unix_state_unlock(sk); - unix_state_unlock(other); - out_free: - kfree_skb(skb); -@@ -1739,8 +1918,10 @@ out_err: - static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, - int offset, size_t size, int flags) - { -- int err = 0; -- bool send_sigpipe = true; -+ int err; -+ bool send_sigpipe = false; -+ bool init_scm = true; -+ struct scm_cookie scm; - struct sock *other, *sk = socket->sk; - struct sk_buff *skb, *newskb = NULL, *tail = NULL; - -@@ -1758,7 +1939,7 @@ alloc_skb: - newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, - &err, 0); - if (!newskb) -- return err; -+ goto err; - } - - /* we must acquire readlock as we modify already present -@@ -1767,12 +1948,12 @@ alloc_skb: - err = mutex_lock_interruptible(&unix_sk(other)->readlock); - if (err) { - err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; -- send_sigpipe = false; - goto err; - } - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - err = -EPIPE; -+ send_sigpipe = true; - goto err_unlock; - } - -@@ -1781,23 +1962,34 @@ alloc_skb: - if (sock_flag(other, SOCK_DEAD) || - other->sk_shutdown & RCV_SHUTDOWN) { - err = -EPIPE; -+ send_sigpipe = true; - goto err_state_unlock; - } - -+ if (init_scm) { -+ err = maybe_init_creds(&scm, socket, other); -+ if (err) -+ goto err_state_unlock; -+ init_scm = false; -+ } -+ - skb = skb_peek_tail(&other->sk_receive_queue); - if (tail && tail == skb) { - skb = newskb; -- } else if (!skb) { -- if (newskb) -+ } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { -+ if (newskb) { - skb = newskb; -- else -+ } else { -+ tail = skb; - goto alloc_skb; -+ } - } else if (newskb) { - /* this is fast path, we don't necessarily need to - * call to kfree_skb even though with newskb == NULL - * this - does no harm - */ - consume_skb(newskb); -+ newskb = NULL; - } - - if (skb_append_pagefrags(skb, page, offset, size)) { -@@ -1810,14 +2002,20 @@ alloc_skb: - skb->truesize += size; - atomic_add(size, &sk->sk_wmem_alloc); - -- if (newskb) -+ if (newskb) { -+ err = unix_scm_to_skb(&scm, skb, false); -+ if (err) -+ goto err_state_unlock; -+ spin_lock(&other->sk_receive_queue.lock); - __skb_queue_tail(&other->sk_receive_queue, newskb); -+ spin_unlock(&other->sk_receive_queue.lock); -+ } - - unix_state_unlock(other); - mutex_unlock(&unix_sk(other)->readlock); - - other->sk_data_ready(other); -- -+ scm_destroy(&scm); - return size; - - err_state_unlock: -@@ -1828,6 +2026,8 @@ err: - kfree_skb(newskb); - if (send_sigpipe && !(flags & MSG_NOSIGNAL)) - send_sig(SIGPIPE, current, 0); -+ if (!init_scm) -+ scm_destroy(&scm); - return err; - } - -@@ -2071,6 +2271,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) - - do { - int chunk; -+ bool drop_skb; - struct sk_buff *skb, *last; - - unix_state_lock(sk); -@@ -2130,10 +2331,7 @@ unlock: - - if (check_creds) { - /* Never glue messages from different writers */ -- if ((UNIXCB(skb).pid != scm.pid) || -- !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || -- !gid_eq(UNIXCB(skb).gid, scm.creds.gid) || -- !unix_secdata_eq(&scm, skb)) -+ if (!unix_skb_scm_eq(skb, &scm)) - break; - } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { - /* Copy credentials */ -@@ -2151,7 +2349,11 @@ unlock: - } - - chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); -+ skb_get(skb); - chunk = state->recv_actor(skb, skip, chunk, state); -+ drop_skb = !unix_skb_len(skb); -+ /* skb is only safe to use if !drop_skb */ -+ consume_skb(skb); - if (chunk < 0) { - if (copied == 0) - copied = -EFAULT; -@@ -2160,6 +2362,18 @@ unlock: - copied += chunk; - size -= chunk; - -+ if (drop_skb) { -+ /* the skb was touched by a concurrent reader; -+ * we should not expect anything from this skb -+ * anymore and assume it invalid - we can be -+ * sure it was dropped from the socket queue -+ * -+ * let's report a short read -+ */ -+ err = 0; -+ break; -+ } -+ - /* Mark read part of skb as used */ - if (!(flags & MSG_PEEK)) { - UNIXCB(skb).consumed += chunk; -@@ -2453,14 +2667,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, - return mask; - - writable = unix_writable(sk); -- other = unix_peer_get(sk); -- if (other) { -- if (unix_peer(other) != sk) { -- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); -- if (unix_recvq_full(other)) -- writable = 0; -- } -- sock_put(other); -+ if (writable) { -+ unix_state_lock(sk); -+ -+ other = unix_peer(sk); -+ if (other && unix_peer(other) != sk && -+ unix_recvq_full(other) && -+ unix_dgram_peer_wake_me(sk, other)) -+ writable = 0; -+ -+ unix_state_unlock(sk); - } - - if (writable) -diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig -index edfc1b8..656ce39 100644 ---- a/sound/pci/Kconfig -+++ b/sound/pci/Kconfig -@@ -25,7 +25,7 @@ config SND_ALS300 - select SND_PCM - select SND_AC97_CODEC - select SND_OPL3_LIB -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say 'Y' or 'M' to include support for Avance Logic ALS300/ALS300+ - -@@ -50,7 +50,7 @@ config SND_ALI5451 - tristate "ALi M5451 PCI Audio Controller" - select SND_MPU401_UART - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for the integrated AC97 sound - device on motherboards using the ALi M5451 Audio Controller -@@ -155,7 +155,7 @@ config SND_AZT3328 - select SND_PCM - select SND_RAWMIDI - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for Aztech AZF3328 (PCI168) - soundcards. -@@ -463,7 +463,7 @@ config SND_EMU10K1 - select SND_HWDEP - select SND_RAWMIDI - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y to include support for Sound Blaster PCI 512, Live!, - Audigy and E-mu APS (partially supported) soundcards. -@@ -479,7 +479,7 @@ config SND_EMU10K1X - tristate "Emu10k1X (Dell OEM Version)" - select SND_AC97_CODEC - select SND_RAWMIDI -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for the Dell OEM version of the - Sound Blaster Live!. -@@ -513,7 +513,7 @@ config SND_ES1938 - select SND_OPL3_LIB - select SND_MPU401_UART - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for soundcards based on ESS Solo-1 - (ES1938, ES1946, ES1969) chips. -@@ -525,7 +525,7 @@ config SND_ES1968 - tristate "ESS ES1968/1978 (Maestro-1/2/2E)" - select SND_MPU401_UART - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for soundcards based on ESS Maestro - 1/2/2E chips. -@@ -612,7 +612,7 @@ config SND_ICE1712 - select SND_MPU401_UART - select SND_AC97_CODEC - select BITREVERSE -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for soundcards based on the - ICE1712 (Envy24) chip. -@@ -700,7 +700,7 @@ config SND_LX6464ES - config SND_MAESTRO3 - tristate "ESS Allegro/Maestro3" - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for soundcards based on ESS Maestro 3 - (Allegro) chips. -@@ -806,7 +806,7 @@ config SND_SIS7019 - tristate "SiS 7019 Audio Accelerator" - depends on X86_32 - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for the SiS 7019 Audio Accelerator. - -@@ -818,7 +818,7 @@ config SND_SONICVIBES - select SND_OPL3_LIB - select SND_MPU401_UART - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for soundcards based on the S3 - SonicVibes chip. -@@ -830,7 +830,7 @@ config SND_TRIDENT - tristate "Trident 4D-Wave DX/NX; SiS 7018" - select SND_MPU401_UART - select SND_AC97_CODEC -- select ZONE_DMA -+ depends on ZONE_DMA - help - Say Y here to include support for soundcards based on Trident - 4D-Wave DX/NX or SiS 7018 chips. -diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c -index acbfbe08..f22f5c4 100644 ---- a/sound/pci/hda/patch_hdmi.c -+++ b/sound/pci/hda/patch_hdmi.c -@@ -50,8 +50,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); - #define is_haswell(codec) ((codec)->core.vendor_id == 0x80862807) - #define is_broadwell(codec) ((codec)->core.vendor_id == 0x80862808) - #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809) -+#define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a) - #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ -- || is_skylake(codec)) -+ || is_skylake(codec) || is_broxton(codec)) - - #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) - #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) -diff --git a/tools/net/Makefile b/tools/net/Makefile -index ee577ea..ddf8880 100644 ---- a/tools/net/Makefile -+++ b/tools/net/Makefile -@@ -4,6 +4,9 @@ CC = gcc - LEX = flex - YACC = bison - -+CFLAGS += -Wall -O2 -+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include -+ - %.yacc.c: %.y - $(YACC) -o $@ -d $< - -@@ -12,15 +15,13 @@ YACC = bison - - all : bpf_jit_disasm bpf_dbg bpf_asm - --bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm' -+bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm' - bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl - bpf_jit_disasm : bpf_jit_disasm.o - --bpf_dbg : CFLAGS = -Wall -O2 - bpf_dbg : LDLIBS = -lreadline - bpf_dbg : bpf_dbg.o - --bpf_asm : CFLAGS = -Wall -O2 -I. - bpf_asm : LDLIBS = - bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o - bpf_exp.lex.o : bpf_exp.yacc.c diff --git a/4.3.3/4420_grsecurity-3.1-4.3.3-201512162141.patch b/4.3.3/4420_grsecurity-3.1-4.3.3-201512222129.patch index 4b7bff5..2c1d2ad 100644 --- a/4.3.3/4420_grsecurity-3.1-4.3.3-201512162141.patch +++ b/4.3.3/4420_grsecurity-3.1-4.3.3-201512222129.patch @@ -313,7 +313,7 @@ index 13f888a..250729b 100644 A typical pattern in a Kbuild file looks like this: diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 22a4b68..8c70743 100644 +index 22a4b68..0ec4c2a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1246,6 +1246,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. @@ -341,7 +341,7 @@ index 22a4b68..8c70743 100644 nosmap [X86] Disable SMAP (Supervisor Mode Access Prevention) even if it is supported by processor. -@@ -2677,6 +2688,30 @@ bytes respectively. Such letter suffixes can also be entirely omitted. +@@ -2677,6 +2688,35 @@ bytes respectively. Such letter suffixes can also be entirely omitted. the specified number of seconds. This is to be used if your oopses keep scrolling off the screen. @@ -366,6 +366,11 @@ index 22a4b68..8c70743 100644 + from the first 4GB of memory as the bootmem allocator + passes the memory pages to the buddy allocator. + ++ pax_size_overflow_report_only ++ Enables rate-limited logging of size_overflow plugin ++ violations while disabling killing of the violating ++ task. ++ + pax_weakuderef [X86-64] enables the weaker but faster form of UDEREF + when the processor supports PCID. + @@ -3811,7 +3816,7 @@ index 845769e..4278fd7 100644 atomic64_set(&mm->context.id, asid); } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c -index 0d629b8..01867c8 100644 +index 0d629b8..f13ad33 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -25,6 +25,7 @@ @@ -3859,7 +3864,7 @@ index 0d629b8..01867c8 100644 #endif +#ifdef CONFIG_PAX_PAGEEXEC -+ if (fsr & FSR_LNX_PF) { ++ if ((tsk->mm->pax_flags & MF_PAX_PAGEEXEC) && (fsr & FSR_LNX_PF)) { + pax_report_fault(regs, (void *)regs->ARM_pc, (void *)regs->ARM_sp); + do_group_exit(SIGKILL); + } @@ -32731,7 +32736,7 @@ index 903ec1e..41b4708 100644 } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c -index eef44d9..b0fb164 100644 +index eef44d9..79b0e58 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -14,6 +14,8 @@ @@ -33203,7 +33208,7 @@ index eef44d9..b0fb164 100644 + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) { -+ if (!(error_code & (PF_PROT | PF_WRITE)) && (ip + SEGMEXEC_TASK_SIZE == address)) ++ if (!(error_code & (PF_PROT | PF_WRITE)) && (ip + SEGMEXEC_TASK_SIZE == address)) + return true; + return false; + } @@ -36996,6 +37001,28 @@ index ad3f276..bef6d50 100644 return ERR_PTR(-EINVAL); nr_pages += end - start; +diff --git a/block/blk-core.c b/block/blk-core.c +index 18e92a6..1834d7c 100644 +--- a/block/blk-core.c ++++ b/block/blk-core.c +@@ -1616,8 +1616,6 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) + struct request *req; + unsigned int request_count = 0; + +- blk_queue_split(q, &bio, q->bio_split); +- + /* + * low level driver can indicate that it wants pages above a + * certain limit bounced to low memory (ie for highmem, or even +@@ -1625,6 +1623,8 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) + */ + blk_queue_bounce(q, &bio); + ++ blk_queue_split(q, &bio, q->bio_split); ++ + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { + bio->bi_error = -EIO; + bio_endio(bio); diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index 0736729..2ec3b48 100644 --- a/block/blk-iopoll.c @@ -75725,6 +75752,32 @@ index f70119f..b7d2bb4 100644 /* for init */ int __init btrfs_delayed_inode_init(void); +diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c +index 6a98bdd..fed3da6 100644 +--- a/fs/btrfs/extent_map.c ++++ b/fs/btrfs/extent_map.c +@@ -235,7 +235,9 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) + em->start = merge->start; + em->orig_start = merge->orig_start; + em->len += merge->len; +- em->block_len += merge->block_len; ++ if (em->block_start != EXTENT_MAP_HOLE && ++ em->block_start != EXTENT_MAP_INLINE) ++ em->block_len += merge->block_len; + em->block_start = merge->block_start; + em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; + em->mod_start = merge->mod_start; +@@ -252,7 +254,9 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(em, merge)) { + em->len += merge->len; +- em->block_len += merge->block_len; ++ if (em->block_start != EXTENT_MAP_HOLE && ++ em->block_start != EXTENT_MAP_INLINE) ++ em->block_len += merge->block_len; + rb_erase(&merge->rb_node, &tree->map); + RB_CLEAR_NODE(&merge->rb_node); + em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 396e3d5..e752d29 100644 --- a/fs/btrfs/inode.c @@ -77174,7 +77227,7 @@ index e4141f2..d8263e8 100644 i += packet_length_size; if (copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size)) diff --git a/fs/exec.c b/fs/exec.c -index b06623a..895c666 100644 +index b06623a..1c50b96 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -56,8 +56,20 @@ @@ -77670,7 +77723,7 @@ index b06623a..895c666 100644 out: if (bprm->mm) { acct_arg_size(bprm, 0); -@@ -1749,3 +1924,313 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd, +@@ -1749,3 +1924,319 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd, argv, envp, flags); } #endif @@ -77976,11 +78029,17 @@ index b06623a..895c666 100644 + +#ifdef CONFIG_PAX_SIZE_OVERFLOW + ++static DEFINE_RATELIMIT_STATE(size_overflow_ratelimit, 15 * HZ, 3); ++extern bool pax_size_overflow_report_only; ++ +void __nocapture(1, 3, 4) __used report_size_overflow(const char *file, unsigned int line, const char *func, const char *ssa_name) +{ -+ printk(KERN_EMERG "PAX: size overflow detected in function %s %s:%u %s", func, file, line, ssa_name); -+ dump_stack(); -+ do_group_exit(SIGKILL); ++ if (!pax_size_overflow_report_only || __ratelimit(&size_overflow_ratelimit)) { ++ printk(KERN_EMERG "PAX: size overflow detected in function %s %s:%u %s", func, file, line, ssa_name); ++ dump_stack(); ++ } ++ if (!pax_size_overflow_report_only) ++ do_group_exit(SIGKILL); +} +EXPORT_SYMBOL(report_size_overflow); +#endif @@ -82413,7 +82472,7 @@ index eed2050..fb443f2 100644 static struct pid * get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) diff --git a/fs/proc/base.c b/fs/proc/base.c -index 29595af..6ab6000 100644 +index 29595af..aeaaf2e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -113,6 +113,14 @@ struct pid_entry { @@ -82794,7 +82853,15 @@ index 29595af..6ab6000 100644 if (!dir_emit_dots(file, ctx)) goto out; -@@ -2519,7 +2645,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh +@@ -2484,6 +2610,7 @@ static ssize_t proc_coredump_filter_write(struct file *file, + mm = get_task_mm(task); + if (!mm) + goto out_no_mm; ++ ret = 0; + + for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { + if (val & mask) +@@ -2519,7 +2646,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh if (result) return result; @@ -82803,7 +82870,7 @@ index 29595af..6ab6000 100644 result = -EACCES; goto out_unlock; } -@@ -2738,7 +2864,7 @@ static const struct pid_entry tgid_base_stuff[] = { +@@ -2738,7 +2865,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), @@ -82812,7 +82879,7 @@ index 29595af..6ab6000 100644 ONE("syscall", S_IRUSR, proc_pid_syscall), #endif REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), -@@ -2763,10 +2889,10 @@ static const struct pid_entry tgid_base_stuff[] = { +@@ -2763,10 +2890,10 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif @@ -82825,7 +82892,7 @@ index 29595af..6ab6000 100644 ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHED_INFO -@@ -2800,6 +2926,9 @@ static const struct pid_entry tgid_base_stuff[] = { +@@ -2800,6 +2927,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_HARDWALL ONE("hardwall", S_IRUGO, proc_pid_hardwall), #endif @@ -82835,7 +82902,7 @@ index 29595af..6ab6000 100644 #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), -@@ -2932,7 +3061,14 @@ static int proc_pid_instantiate(struct inode *dir, +@@ -2932,7 +3062,14 @@ static int proc_pid_instantiate(struct inode *dir, if (!inode) goto out; @@ -82850,7 +82917,7 @@ index 29595af..6ab6000 100644 inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; -@@ -2970,7 +3106,11 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign +@@ -2970,7 +3107,11 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign if (!task) goto out; @@ -82862,7 +82929,7 @@ index 29595af..6ab6000 100644 put_task_struct(task); out: return ERR_PTR(result); -@@ -3084,7 +3224,7 @@ static const struct pid_entry tid_base_stuff[] = { +@@ -3084,7 +3225,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), @@ -82871,7 +82938,7 @@ index 29595af..6ab6000 100644 ONE("syscall", S_IRUSR, proc_pid_syscall), #endif REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), -@@ -3111,10 +3251,10 @@ static const struct pid_entry tid_base_stuff[] = { +@@ -3111,10 +3252,10 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif @@ -84997,6 +85064,28 @@ index 8e2010d..95549ab 100644 #endif /* DEBUG */ /* +diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c +index be43248..6bb4442 100644 +--- a/fs/xfs/libxfs/xfs_da_btree.c ++++ b/fs/xfs/libxfs/xfs_da_btree.c +@@ -2007,6 +2007,7 @@ xfs_da_grow_inode_int( + struct xfs_inode *dp = args->dp; + int w = args->whichfork; + xfs_rfsblock_t nblks = dp->i_d.di_nblocks; ++ xfs_rfsblock_t nblocks; + struct xfs_bmbt_irec map, *mapp; + int nmap, error, got, i, mapi; + +@@ -2075,7 +2076,8 @@ xfs_da_grow_inode_int( + } + + /* account for newly allocated blocks in reserved blocks total */ +- args->total -= dp->i_d.di_nblocks - nblks; ++ nblocks = dp->i_d.di_nblocks - nblks; ++ args->total -= nblocks; + + out_free_map: + if (mapp != &map) diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index a989a9c..db30c9a 100644 --- a/fs/xfs/xfs_dir2_readdir.c @@ -105589,7 +105678,7 @@ index b32ad7d..05f6420 100644 next_state = Reset; return 0; diff --git a/init/main.c b/init/main.c -index 9e64d70..141e0b4 100644 +index 9e64d70..2f40cd9 100644 --- a/init/main.c +++ b/init/main.c @@ -97,6 +97,8 @@ extern void radix_tree_init(void); @@ -105601,7 +105690,7 @@ index 9e64d70..141e0b4 100644 /* * Debug helper: via this flag we know that we are in 'early bootup code' * where only the boot processor is running with IRQ disabled. This means -@@ -158,6 +160,37 @@ static int __init set_reset_devices(char *str) +@@ -158,6 +160,48 @@ static int __init set_reset_devices(char *str) __setup("reset_devices", set_reset_devices); @@ -105636,10 +105725,21 @@ index 9e64d70..141e0b4 100644 +__setup("pax_softmode=", setup_pax_softmode); +#endif + ++#ifdef CONFIG_PAX_SIZE_OVERFLOW ++bool pax_size_overflow_report_only __read_only; ++ ++static int __init setup_pax_size_overflow_report_only(char *str) ++{ ++ pax_size_overflow_report_only = true; ++ return 0; ++} ++early_param("pax_size_overflow_report_only", setup_pax_size_overflow_report_only); ++#endif ++ static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; -@@ -731,7 +764,7 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn) +@@ -731,7 +775,7 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn) struct blacklist_entry *entry; char *fn_name; @@ -105648,7 +105748,7 @@ index 9e64d70..141e0b4 100644 if (!fn_name) return false; -@@ -783,7 +816,7 @@ int __init_or_module do_one_initcall(initcall_t fn) +@@ -783,7 +827,7 @@ int __init_or_module do_one_initcall(initcall_t fn) { int count = preempt_count(); int ret; @@ -105657,7 +105757,7 @@ index 9e64d70..141e0b4 100644 if (initcall_blacklisted(fn)) return -EPERM; -@@ -793,18 +826,17 @@ int __init_or_module do_one_initcall(initcall_t fn) +@@ -793,18 +837,17 @@ int __init_or_module do_one_initcall(initcall_t fn) else ret = fn(); @@ -105680,7 +105780,7 @@ index 9e64d70..141e0b4 100644 return ret; } -@@ -909,8 +941,8 @@ static int run_init_process(const char *init_filename) +@@ -909,8 +952,8 @@ static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; return do_execve(getname_kernel(init_filename), @@ -105691,7 +105791,7 @@ index 9e64d70..141e0b4 100644 } static int try_to_run_init_process(const char *init_filename) -@@ -927,6 +959,10 @@ static int try_to_run_init_process(const char *init_filename) +@@ -927,6 +970,10 @@ static int try_to_run_init_process(const char *init_filename) return ret; } @@ -105702,7 +105802,7 @@ index 9e64d70..141e0b4 100644 static noinline void __init kernel_init_freeable(void); static int __ref kernel_init(void *unused) -@@ -951,6 +987,11 @@ static int __ref kernel_init(void *unused) +@@ -951,6 +998,11 @@ static int __ref kernel_init(void *unused) ramdisk_execute_command, ret); } @@ -105714,7 +105814,7 @@ index 9e64d70..141e0b4 100644 /* * We try each of these until one succeeds. * -@@ -1008,7 +1049,7 @@ static noinline void __init kernel_init_freeable(void) +@@ -1008,7 +1060,7 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); /* Open the /dev/console on the rootfs, this should never fail */ @@ -105723,7 +105823,7 @@ index 9e64d70..141e0b4 100644 pr_err("Warning: unable to open an initial console.\n"); (void) sys_dup(0); -@@ -1021,11 +1062,13 @@ static noinline void __init kernel_init_freeable(void) +@@ -1021,11 +1073,13 @@ static noinline void __init kernel_init_freeable(void) if (!ramdisk_execute_command) ramdisk_execute_command = "/init"; @@ -109683,10 +109783,45 @@ index 99513e1..0caa643 100644 } diff --git a/kernel/ptrace.c b/kernel/ptrace.c -index 787320d..9e9535d 100644 +index 787320d..9873654 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c -@@ -219,6 +219,13 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) +@@ -207,18 +207,45 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) + return ret; + } + +-static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) ++static bool ptrace_has_cap(const struct cred *tcred, unsigned int mode) + { ++ struct user_namespace *tns = tcred->user_ns; ++ struct user_namespace *curns = current_cred()->user_ns; ++ ++ /* When a root-owned process enters a user namespace created by a ++ * malicious user, the user shouldn't be able to execute code under ++ * uid 0 by attaching to the root-owned process via ptrace. ++ * Therefore, similar to the capable_wrt_inode_uidgid() check, ++ * verify that all the uids and gids of the target process are ++ * mapped into the current namespace. ++ * No fsuid/fsgid check because __ptrace_may_access doesn't do it ++ * either. ++ */ ++ if (!kuid_has_mapping(curns, tcred->euid) || ++ !kuid_has_mapping(curns, tcred->suid) || ++ !kuid_has_mapping(curns, tcred->uid) || ++ !kgid_has_mapping(curns, tcred->egid) || ++ !kgid_has_mapping(curns, tcred->sgid) || ++ !kgid_has_mapping(curns, tcred->gid)) ++ return false; ++ + if (mode & PTRACE_MODE_NOAUDIT) +- return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); ++ return has_ns_capability_noaudit(current, tns, CAP_SYS_PTRACE); + else +- return has_ns_capability(current, ns, CAP_SYS_PTRACE); ++ return has_ns_capability(current, tns, CAP_SYS_PTRACE); + } + + /* Returns 0 on success, -errno on denial. */ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; @@ -109700,7 +109835,7 @@ index 787320d..9e9535d 100644 /* May we inspect the given task? * This check is used both for attaching with ptrace -@@ -233,13 +240,28 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) +@@ -233,15 +260,30 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) if (same_thread_group(task, current)) return 0; rcu_read_lock(); @@ -109733,9 +109868,21 @@ index 787320d..9e9535d 100644 + gid_eq(caller_gid, tcred->sgid) && + gid_eq(caller_gid, tcred->gid)) goto ok; - if (ptrace_has_cap(tcred->user_ns, mode)) +- if (ptrace_has_cap(tcred->user_ns, mode)) ++ if (ptrace_has_cap(tcred, mode)) goto ok; -@@ -306,7 +328,7 @@ static int ptrace_attach(struct task_struct *task, long request, + rcu_read_unlock(); + return -EPERM; +@@ -252,7 +294,7 @@ ok: + dumpable = get_dumpable(task->mm); + rcu_read_lock(); + if (dumpable != SUID_DUMP_USER && +- !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { ++ !ptrace_has_cap(__task_cred(task), mode)) { + rcu_read_unlock(); + return -EPERM; + } +@@ -306,7 +348,7 @@ static int ptrace_attach(struct task_struct *task, long request, goto out; task_lock(task); @@ -109744,7 +109891,7 @@ index 787320d..9e9535d 100644 task_unlock(task); if (retval) goto unlock_creds; -@@ -321,7 +343,7 @@ static int ptrace_attach(struct task_struct *task, long request, +@@ -321,7 +363,7 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) flags |= PT_SEIZED; rcu_read_lock(); @@ -109753,7 +109900,7 @@ index 787320d..9e9535d 100644 flags |= PT_PTRACE_CAP; rcu_read_unlock(); task->ptrace = flags; -@@ -514,7 +536,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst +@@ -514,7 +556,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst break; return -EIO; } @@ -109762,7 +109909,7 @@ index 787320d..9e9535d 100644 return -EFAULT; copied += retval; src += retval; -@@ -815,7 +837,7 @@ int ptrace_request(struct task_struct *child, long request, +@@ -815,7 +857,7 @@ int ptrace_request(struct task_struct *child, long request, bool seized = child->ptrace & PT_SEIZED; int ret = -EIO; siginfo_t siginfo, *si; @@ -109771,7 +109918,7 @@ index 787320d..9e9535d 100644 unsigned long __user *datalp = datavp; unsigned long flags; -@@ -1061,14 +1083,21 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, +@@ -1061,14 +1103,21 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, goto out; } @@ -109794,7 +109941,7 @@ index 787320d..9e9535d 100644 goto out_put_task_struct; } -@@ -1096,7 +1125,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, +@@ -1096,7 +1145,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0); if (copied != sizeof(tmp)) return -EIO; @@ -109803,7 +109950,7 @@ index 787320d..9e9535d 100644 } int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, -@@ -1189,7 +1218,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, +@@ -1189,7 +1238,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, } COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, @@ -109812,7 +109959,7 @@ index 787320d..9e9535d 100644 { struct task_struct *child; long ret; -@@ -1205,14 +1234,21 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, +@@ -1205,14 +1254,21 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, goto out; } @@ -120164,7 +120311,7 @@ index 8a1741b..20d20e7 100644 if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); diff --git a/net/core/skbuff.c b/net/core/skbuff.c -index fab4599..e553f88 100644 +index fab4599..e488a92 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2103,7 +2103,7 @@ EXPORT_SYMBOL(__skb_checksum); @@ -120193,8 +120340,18 @@ index fab4599..e553f88 100644 NULL); } +@@ -3643,7 +3645,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, + serr->ee.ee_info = tstype; + if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { + serr->ee.ee_data = skb_shinfo(skb)->tskey; +- if (sk->sk_protocol == IPPROTO_TCP) ++ if (sk->sk_protocol == IPPROTO_TCP && ++ sk->sk_type == SOCK_STREAM) + serr->ee.ee_data -= sk->sk_tskey; + } + diff --git a/net/core/sock.c b/net/core/sock.c -index 3307c02..08b1281 100644 +index 3307c02..3a9bfdc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -441,7 +441,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -120233,7 +120390,17 @@ index 3307c02..08b1281 100644 goto discard_and_relse; } -@@ -908,6 +908,7 @@ set_rcvbuf: +@@ -862,7 +862,8 @@ set_rcvbuf: + + if (val & SOF_TIMESTAMPING_OPT_ID && + !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { +- if (sk->sk_protocol == IPPROTO_TCP) { ++ if (sk->sk_protocol == IPPROTO_TCP && ++ sk->sk_type == SOCK_STREAM) { + if (sk->sk_state != TCP_ESTABLISHED) { + ret = -EINVAL; + break; +@@ -908,6 +909,7 @@ set_rcvbuf: } break; @@ -120241,7 +120408,7 @@ index 3307c02..08b1281 100644 case SO_ATTACH_BPF: ret = -EINVAL; if (optlen == sizeof(u32)) { -@@ -920,7 +921,7 @@ set_rcvbuf: +@@ -920,7 +922,7 @@ set_rcvbuf: ret = sk_attach_bpf(ufd, sk); } break; @@ -120250,7 +120417,7 @@ index 3307c02..08b1281 100644 case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; -@@ -1022,12 +1023,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, +@@ -1022,12 +1024,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct timeval tm; } v; @@ -120266,7 +120433,7 @@ index 3307c02..08b1281 100644 return -EINVAL; memset(&v, 0, sizeof(v)); -@@ -1165,11 +1166,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, +@@ -1165,11 +1167,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_PEERNAME: { @@ -120280,7 +120447,7 @@ index 3307c02..08b1281 100644 return -EINVAL; if (copy_to_user(optval, address, len)) return -EFAULT; -@@ -1257,7 +1258,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, +@@ -1257,7 +1259,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, if (len > lv) len = lv; @@ -120289,7 +120456,7 @@ index 3307c02..08b1281 100644 return -EFAULT; lenout: if (put_user(len, optlen)) -@@ -1550,7 +1551,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) +@@ -1550,7 +1552,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_err = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); @@ -120298,7 +120465,7 @@ index 3307c02..08b1281 100644 /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) -@@ -2359,7 +2360,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) +@@ -2359,7 +2361,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) */ smp_wmb(); atomic_set(&sk->sk_refcnt, 1); @@ -120307,7 +120474,7 @@ index 3307c02..08b1281 100644 } EXPORT_SYMBOL(sock_init_data); -@@ -2487,6 +2488,7 @@ void sock_enable_timestamp(struct sock *sk, int flag) +@@ -2487,6 +2489,7 @@ void sock_enable_timestamp(struct sock *sk, int flag) int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type) { @@ -120315,7 +120482,7 @@ index 3307c02..08b1281 100644 struct sock_exterr_skb *serr; struct sk_buff *skb; int copied, err; -@@ -2508,7 +2510,8 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, +@@ -2508,7 +2511,8 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, sock_recv_timestamp(msg, sk, skb); serr = SKB_EXT_ERR(skb); @@ -121854,7 +122021,7 @@ index c10a9ee..c621a01 100644 return -ENOMEM; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c -index 3939dd2..ea4fbed 100644 +index 3939dd2..7372e9a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -178,7 +178,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { @@ -121985,7 +122152,25 @@ index 3939dd2..ea4fbed 100644 struct net *net = ctl->extra2; struct ipv6_stable_secret *secret = ctl->data; -@@ -5397,7 +5410,7 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, +@@ -5343,13 +5356,10 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, + goto out; + } + +- if (!write) { +- err = snprintf(str, sizeof(str), "%pI6", +- &secret->secret); +- if (err >= sizeof(str)) { +- err = -EIO; +- goto out; +- } ++ err = snprintf(str, sizeof(str), "%pI6", &secret->secret); ++ if (err >= sizeof(str)) { ++ err = -EIO; ++ goto out; + } + + err = proc_dostring(&lctl, write, buffer, lenp, ppos); +@@ -5397,7 +5407,7 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; |