Networking fixes for 6.4-rc4, including fixes from bluetooth and bpf

Current release - regressions:
 
   - net: fix skb leak in __skb_tstamp_tx()
 
   - eth: mtk_eth_soc: fix QoS on DSA MAC on non MTK_NETSYS_V2 SoCs
 
 Current release - new code bugs:
 
   - handshake:
     - fix sock->file allocation
     - fix handshake_dup() ref counting
 
   - bluetooth:
     - fix potential double free caused by hci_conn_unlink
     - fix UAF in hci_conn_hash_flush
 
 Previous releases - regressions:
 
   - core: fix stack overflow when LRO is disabled for virtual interfaces
 
   - tls: fix strparser rx issues
 
   - bpf:
     - fix many sockmap/TCP related issues
     - fix a memory leak in the LRU and LRU_PERCPU hash maps
     - init the offload table earlier
 
   - eth: mlx5e:
     - do as little as possible in napi poll when budget is 0
     - fix using eswitch mapping in nic mode
     - fix deadlock in tc route query code
 
 Previous releases - always broken:
 
   - udplite: fix NULL pointer dereference in __sk_mem_raise_allocated()
 
   - raw: fix output xfrm lookup wrt protocol
 
   - smc: reset connection when trying to use SMCRv2 fails
 
   - phy: mscc: enable VSC8501/2 RGMII RX clock
 
   - eth: octeontx2-pf: fix TSOv6 offload
 
   - eth: cdc_ncm: deal with too low values of dwNtbOutMaxSize
 
 Signed-off-by: Paolo Abeni <pabeni@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQJGBAABCAAwFiEEg1AjqC77wbdLX2LbKSR5jcyPE6QFAmRvOisSHHBhYmVuaUBy
 ZWRoYXQuY29tAAoJECkkeY3MjxOkMW8P/3rZy4Yy2bIWFCkxKD/aPvqG60ZZfvV/
 sB7Qu3X0OLiDNAmdDsXjCFeMYnV4cxDvwxjFUVQX0ZZEilEbGQ2XlOaFTpXS3jeW
 UQup55DW7VG6BkuNJipwtLkLSQ498Z+qinRPsmNPVADkItHHbyrSnKNjh34ruhly
 P5edWJ/3PuzoK2hN/izgBpk0i1UC1+tSKKANV5dlIWb6CXY9C8pvr0CScuGb5rKv
 xAs40Rp1eaFmkYkhbAn3H2fvSOoCr2aSDeS2SvRAxca9OUcrUAjnnsLTVq5WI22/
 PxSESy6wfE2e5+q1AwskwBdFO3LLKheVYJF2KzSlRk4FuWk50GbwbpueRSOYEU7b
 2w0MveYggr4m3B06/2esrsr6bEPsb4QFKE+hubX5FmIPECOz+dOA0RW4mOysvzqM
 q+xEuR9uWFsrMO7WVU7/4oF02HqAfAtaEn/87aniGz5o7bzPbmyyyBKfmb4s2c13
 TU828rEBNGkmqxSwsZHUOt21IJoOa646W99zsmGpRo/m47pFx093HVR22Hr1dH0B
 BllhsmtvJZ2XsWkR2Q9aAyyluc3/b3yI24OM125y7bIBWte2MF908xaStx/al+AF
 jPL/ioEQKNsOJKHan9EzhbyH98RCfEotLb+ha/qNQ9GGjKROHsTn9EgP7h7367oo
 yS8QLmvng01f
 =hz3D
 -----END PGP SIGNATURE-----

Merge tag 'net-6.4-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from Paolo Abeni:
 "Including fixes from bluetooth and bpf.

  Current release - regressions:

   - net: fix skb leak in __skb_tstamp_tx()

   - eth: mtk_eth_soc: fix QoS on DSA MAC on non MTK_NETSYS_V2 SoCs

  Current release - new code bugs:

   - handshake:
      - fix sock->file allocation
      - fix handshake_dup() ref counting

   - bluetooth:
      - fix potential double free caused by hci_conn_unlink
      - fix UAF in hci_conn_hash_flush

  Previous releases - regressions:

   - core: fix stack overflow when LRO is disabled for virtual
     interfaces

   - tls: fix strparser rx issues

   - bpf:
      - fix many sockmap/TCP related issues
      - fix a memory leak in the LRU and LRU_PERCPU hash maps
      - init the offload table earlier

   - eth: mlx5e:
      - do as little as possible in napi poll when budget is 0
      - fix using eswitch mapping in nic mode
      - fix deadlock in tc route query code

  Previous releases - always broken:

   - udplite: fix NULL pointer dereference in __sk_mem_raise_allocated()

   - raw: fix output xfrm lookup wrt protocol

   - smc: reset connection when trying to use SMCRv2 fails

   - phy: mscc: enable VSC8501/2 RGMII RX clock

   - eth: octeontx2-pf: fix TSOv6 offload

   - eth: cdc_ncm: deal with too low values of dwNtbOutMaxSize"

* tag 'net-6.4-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (79 commits)
  udplite: Fix NULL pointer dereference in __sk_mem_raise_allocated().
  net: phy: mscc: enable VSC8501/2 RGMII RX clock
  net: phy: mscc: remove unnecessary phydev locking
  net: phy: mscc: add support for VSC8501
  net: phy: mscc: add VSC8502 to MODULE_DEVICE_TABLE
  net/handshake: Enable the SNI extension to work properly
  net/handshake: Unpin sock->file if a handshake is cancelled
  net/handshake: handshake_genl_notify() shouldn't ignore @flags
  net/handshake: Fix uninitialized local variable
  net/handshake: Fix handshake_dup() ref counting
  net/handshake: Remove unneeded check from handshake_dup()
  ipv6: Fix out-of-bounds access in ipv6_find_tlv()
  net: ethernet: mtk_eth_soc: fix QoS on DSA MAC on non MTK_NETSYS_V2 SoCs
  docs: netdev: document the existence of the mail bot
  net: fix skb leak in __skb_tstamp_tx()
  r8169: Use a raw_spinlock_t for the register locks.
  page_pool: fix inconsistency for page_pool_ring_[un]lock()
  bpf, sockmap: Test progs verifier error with latest clang
  bpf, sockmap: Test FIONREAD returns correct bytes in rx buffer with drops
  bpf, sockmap: Test FIONREAD returns correct bytes in rx buffer
  ...
This commit is contained in:
Linus Torvalds 2023-05-25 10:55:26 -07:00
commit 50fb587e6a
88 changed files with 1489 additions and 793 deletions

View file

@ -68,6 +68,9 @@ attribute-sets:
type: nest type: nest
nested-attributes: x509 nested-attributes: x509
multi-attr: true multi-attr: true
-
name: peername
type: string
- -
name: done name: done
attributes: attributes:
@ -105,6 +108,7 @@ operations:
- auth-mode - auth-mode
- peer-identity - peer-identity
- certificate - certificate
- peername
- -
name: done name: done
doc: Handler reports handshake completion doc: Handler reports handshake completion

View file

@ -53,6 +53,7 @@ fills in a structure that contains the parameters of the request:
struct socket *ta_sock; struct socket *ta_sock;
tls_done_func_t ta_done; tls_done_func_t ta_done;
void *ta_data; void *ta_data;
const char *ta_peername;
unsigned int ta_timeout_ms; unsigned int ta_timeout_ms;
key_serial_t ta_keyring; key_serial_t ta_keyring;
key_serial_t ta_my_cert; key_serial_t ta_my_cert;
@ -71,6 +72,10 @@ instantiated a struct file in sock->file.
has completed. Further explanation of this function is in the "Handshake has completed. Further explanation of this function is in the "Handshake
Completion" sesction below. Completion" sesction below.
The consumer can provide a NUL-terminated hostname in the @ta_peername
field that is sent as part of ClientHello. If no peername is provided,
the DNS hostname associated with the server's IP address is used instead.
The consumer can fill in the @ta_timeout_ms field to force the servicing The consumer can fill in the @ta_timeout_ms field to force the servicing
handshake agent to exit after a number of milliseconds. This enables the handshake agent to exit after a number of milliseconds. This enables the
socket to be fully closed once both the kernel and the handshake agent socket to be fully closed once both the kernel and the handshake agent

View file

@ -127,13 +127,32 @@ the value of ``Message-ID`` to the URL above.
Updating patch status Updating patch status
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
It may be tempting to help the maintainers and update the state of your Contributors and reviewers do not have the permissions to update patch
own patches when you post a new version or spot a bug. Please **do not** state directly in patchwork. Patchwork doesn't expose much information
do that. about the history of the state of patches, therefore having multiple
Interfering with the patch status on patchwork will only cause confusion. Leave people update the state leads to confusion.
it to the maintainer to figure out what is the most recent and current
version that should be applied. If there is any doubt, the maintainer Instead of delegating patchwork permissions netdev uses a simple mail
will reply and ask what should be done. bot which looks for special commands/lines within the emails sent to
the mailing list. For example to mark a series as Changes Requested
one needs to send the following line anywhere in the email thread::
pw-bot: changes-requested
As a result the bot will set the entire series to Changes Requested.
This may be useful when author discovers a bug in their own series
and wants to prevent it from getting applied.
The use of the bot is entirely optional, if in doubt ignore its existence
completely. Maintainers will classify and update the state of the patches
themselves. No email should ever be sent to the list with the main purpose
of communicating with the bot, the bot commands should be seen as metadata.
The use of the bot is restricted to authors of the patches (the ``From:``
header on patch submission and command must match!), maintainers themselves
and a handful of senior reviewers. Bot records its activity here:
https://patchwork.hopto.org/pw-bot.html
Review timelines Review timelines
~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~

View file

@ -8153,6 +8153,7 @@ F: include/linux/spi/spi-fsl-dspi.h
FREESCALE ENETC ETHERNET DRIVERS FREESCALE ENETC ETHERNET DRIVERS
M: Claudiu Manoil <claudiu.manoil@nxp.com> M: Claudiu Manoil <claudiu.manoil@nxp.com>
M: Vladimir Oltean <vladimir.oltean@nxp.com>
L: netdev@vger.kernel.org L: netdev@vger.kernel.org
S: Maintained S: Maintained
F: drivers/net/ethernet/freescale/enetc/ F: drivers/net/ethernet/freescale/enetc/

View file

@ -1319,17 +1319,17 @@ static void nxp_serdev_remove(struct serdev_device *serdev)
hci_free_dev(hdev); hci_free_dev(hdev);
} }
static struct btnxpuart_data w8987_data = { static struct btnxpuart_data w8987_data __maybe_unused = {
.helper_fw_name = NULL, .helper_fw_name = NULL,
.fw_name = FIRMWARE_W8987, .fw_name = FIRMWARE_W8987,
}; };
static struct btnxpuart_data w8997_data = { static struct btnxpuart_data w8997_data __maybe_unused = {
.helper_fw_name = FIRMWARE_HELPER, .helper_fw_name = FIRMWARE_HELPER,
.fw_name = FIRMWARE_W8997, .fw_name = FIRMWARE_W8997,
}; };
static const struct of_device_id nxpuart_of_match_table[] = { static const struct of_device_id nxpuart_of_match_table[] __maybe_unused = {
{ .compatible = "nxp,88w8987-bt", .data = &w8987_data }, { .compatible = "nxp,88w8987-bt", .data = &w8987_data },
{ .compatible = "nxp,88w8997-bt", .data = &w8997_data }, { .compatible = "nxp,88w8997-bt", .data = &w8997_data },
{ } { }

View file

@ -3947,7 +3947,11 @@ static int bond_slave_netdev_event(unsigned long event,
unblock_netpoll_tx(); unblock_netpoll_tx();
break; break;
case NETDEV_FEAT_CHANGE: case NETDEV_FEAT_CHANGE:
bond_compute_features(bond); if (!bond->notifier_ctx) {
bond->notifier_ctx = true;
bond_compute_features(bond);
bond->notifier_ctx = false;
}
break; break;
case NETDEV_RESEND_IGMP: case NETDEV_RESEND_IGMP:
/* Propagate to master device */ /* Propagate to master device */
@ -6342,6 +6346,8 @@ static int bond_init(struct net_device *bond_dev)
if (!bond->wq) if (!bond->wq)
return -ENOMEM; return -ENOMEM;
bond->notifier_ctx = false;
spin_lock_init(&bond->stats_lock); spin_lock_init(&bond->stats_lock);
netdev_lockdep_set_classes(bond_dev); netdev_lockdep_set_classes(bond_dev);

View file

@ -195,6 +195,7 @@ static int tc589_probe(struct pcmcia_device *link)
{ {
struct el3_private *lp; struct el3_private *lp;
struct net_device *dev; struct net_device *dev;
int ret;
dev_dbg(&link->dev, "3c589_attach()\n"); dev_dbg(&link->dev, "3c589_attach()\n");
@ -218,7 +219,15 @@ static int tc589_probe(struct pcmcia_device *link)
dev->ethtool_ops = &netdev_ethtool_ops; dev->ethtool_ops = &netdev_ethtool_ops;
return tc589_config(link); ret = tc589_config(link);
if (ret)
goto err_free_netdev;
return 0;
err_free_netdev:
free_netdev(dev);
return ret;
} }
static void tc589_detach(struct pcmcia_device *link) static void tc589_detach(struct pcmcia_device *link)

View file

@ -3834,6 +3834,11 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
index = fec_enet_get_bd_index(last_bdp, &txq->bd); index = fec_enet_get_bd_index(last_bdp, &txq->bd);
txq->tx_skbuff[index] = NULL; txq->tx_skbuff[index] = NULL;
/* Make sure the updates to rest of the descriptor are performed before
* transferring ownership.
*/
dma_wmb();
/* Send it on its way. Tell FEC it's ready, interrupt when done, /* Send it on its way. Tell FEC it's ready, interrupt when done,
* it's the last BD of the frame, and to put the CRC on the end. * it's the last BD of the frame, and to put the CRC on the end.
*/ */
@ -3843,8 +3848,14 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
/* If this was the last BD in the ring, start at the beginning again. */ /* If this was the last BD in the ring, start at the beginning again. */
bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd); bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd);
/* Make sure the update to bdp are performed before txq->bd.cur. */
dma_wmb();
txq->bd.cur = bdp; txq->bd.cur = bdp;
/* Trigger transmission start */
writel(0, txq->bd.reg_desc_active);
return 0; return 0;
} }
@ -3873,12 +3884,6 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
sent_frames++; sent_frames++;
} }
/* Make sure the update to bdp and tx_skbuff are performed. */
wmb();
/* Trigger transmission start */
writel(0, txq->bd.reg_desc_active);
__netif_tx_unlock(nq); __netif_tx_unlock(nq);
return sent_frames; return sent_frames;

View file

@ -652,9 +652,7 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
htons(ext->lso_sb - skb_network_offset(skb)); htons(ext->lso_sb - skb_network_offset(skb));
} else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
ext->lso_format = pfvf->hw.lso_tsov6_idx; ext->lso_format = pfvf->hw.lso_tsov6_idx;
ipv6_hdr(skb)->payload_len = htons(tcp_hdrlen(skb));
ipv6_hdr(skb)->payload_len =
htons(ext->lso_sb - skb_network_offset(skb));
} else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
__be16 l3_proto = vlan_get_protocol(skb); __be16 l3_proto = vlan_get_protocol(skb);
struct udphdr *udph = udp_hdr(skb); struct udphdr *udph = udp_hdr(skb);

View file

@ -3269,18 +3269,14 @@ static int mtk_open(struct net_device *dev)
eth->dsa_meta[i] = md_dst; eth->dsa_meta[i] = md_dst;
} }
} else { } else {
/* Hardware special tag parsing needs to be disabled if at least /* Hardware DSA untagging and VLAN RX offloading need to be
* one MAC does not use DSA. * disabled if at least one MAC does not use DSA.
*/ */
u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL); u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
val &= ~MTK_CDMP_STAG_EN; val &= ~MTK_CDMP_STAG_EN;
mtk_w32(eth, val, MTK_CDMP_IG_CTRL); mtk_w32(eth, val, MTK_CDMP_IG_CTRL);
val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
val &= ~MTK_CDMQ_STAG_EN;
mtk_w32(eth, val, MTK_CDMQ_IG_CTRL);
mtk_w32(eth, 0, MTK_CDMP_EG_CTRL); mtk_w32(eth, 0, MTK_CDMP_EG_CTRL);
} }

View file

@ -1920,9 +1920,10 @@ static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
u32 syndrome, int err) u32 syndrome, int err)
{ {
const char *namep = mlx5_command_str(opcode);
struct mlx5_cmd_stats *stats; struct mlx5_cmd_stats *stats;
if (!err) if (!err || !(strcmp(namep, "unknown command opcode")))
return; return;
stats = &dev->cmd.stats[opcode]; stats = &dev->cmd.stats[opcode];

View file

@ -175,6 +175,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
/* ensure cq space is freed before enabling more cqes */ /* ensure cq space is freed before enabling more cqes */
wmb(); wmb();
mlx5e_txqsq_wake(&ptpsq->txqsq);
return work_done == budget; return work_done == budget;
} }

View file

@ -1369,11 +1369,13 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow; struct mlx5e_tc_flow *flow;
list_for_each_entry(flow, encap_flows, tmp_list) { list_for_each_entry(flow, encap_flows, tmp_list) {
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr; struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_flow_attr *attr;
if (!mlx5e_is_offloaded_flow(flow)) if (!mlx5e_is_offloaded_flow(flow))
continue; continue;
attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr; esw_attr = attr->esw_attr;
if (flow_flag_test(flow, SLOW)) if (flow_flag_test(flow, SLOW))

View file

@ -193,6 +193,8 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
return pi; return pi;
} }
void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq);
static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{ {
return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1); return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);

View file

@ -1665,11 +1665,9 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_
int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
{ {
struct mlx5e_priv *out_priv, *route_priv; struct mlx5e_priv *out_priv, *route_priv;
struct mlx5_devcom *devcom = NULL;
struct mlx5_core_dev *route_mdev; struct mlx5_core_dev *route_mdev;
struct mlx5_eswitch *esw; struct mlx5_eswitch *esw;
u16 vhca_id; u16 vhca_id;
int err;
out_priv = netdev_priv(out_dev); out_priv = netdev_priv(out_dev);
esw = out_priv->mdev->priv.eswitch; esw = out_priv->mdev->priv.eswitch;
@ -1678,6 +1676,9 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
if (mlx5_lag_is_active(out_priv->mdev)) { if (mlx5_lag_is_active(out_priv->mdev)) {
struct mlx5_devcom *devcom;
int err;
/* In lag case we may get devices from different eswitch instances. /* In lag case we may get devices from different eswitch instances.
* If we failed to get vport num, it means, mostly, that we on the wrong * If we failed to get vport num, it means, mostly, that we on the wrong
* eswitch. * eswitch.
@ -1686,16 +1687,16 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro
if (err != -ENOENT) if (err != -ENOENT)
return err; return err;
rcu_read_lock();
devcom = out_priv->mdev->priv.devcom; devcom = out_priv->mdev->priv.devcom;
esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
if (!esw) err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV;
return -ENODEV; rcu_read_unlock();
return err;
} }
err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
if (devcom)
mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
return err;
} }
static int static int
@ -5301,6 +5302,8 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
goto err_action_counter; goto err_action_counter;
} }
mlx5_esw_offloads_devcom_init(esw);
return 0; return 0;
err_action_counter: err_action_counter:
@ -5329,7 +5332,7 @@ void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
priv = netdev_priv(rpriv->netdev); priv = netdev_priv(rpriv->netdev);
esw = priv->mdev->priv.eswitch; esw = priv->mdev->priv.eswitch;
mlx5e_tc_clean_fdb_peer_flows(esw); mlx5_esw_offloads_devcom_cleanup(esw);
mlx5e_tc_tun_cleanup(uplink_priv->encap); mlx5e_tc_tun_cleanup(uplink_priv->encap);
@ -5643,22 +5646,43 @@ bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
0, NULL); 0, NULL);
} }
static struct mapping_ctx *
mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv)
{
struct mlx5e_tc_table *tc;
struct mlx5_eswitch *esw;
struct mapping_ctx *ctx;
if (is_mdev_switchdev_mode(priv->mdev)) {
esw = priv->mdev->priv.eswitch;
ctx = esw->offloads.reg_c0_obj_pool;
} else {
tc = mlx5e_fs_get_tc(priv->fs);
ctx = tc->mapping;
}
return ctx;
}
int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u64 act_miss_cookie, u32 *act_miss_mapping) u64 act_miss_cookie, u32 *act_miss_mapping)
{ {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_mapped_obj mapped_obj = {}; struct mlx5_mapped_obj mapped_obj = {};
struct mlx5_eswitch *esw;
struct mapping_ctx *ctx; struct mapping_ctx *ctx;
int err; int err;
ctx = esw->offloads.reg_c0_obj_pool; ctx = mlx5e_get_priv_obj_mapping(priv);
mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS; mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
mapped_obj.act_miss_cookie = act_miss_cookie; mapped_obj.act_miss_cookie = act_miss_cookie;
err = mapping_add(ctx, &mapped_obj, act_miss_mapping); err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
if (err) if (err)
return err; return err;
if (!is_mdev_switchdev_mode(priv->mdev))
return 0;
esw = priv->mdev->priv.eswitch;
attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
if (IS_ERR(attr->act_id_restore_rule)) if (IS_ERR(attr->act_id_restore_rule))
goto err_rule; goto err_rule;
@ -5673,10 +5697,9 @@ err_rule:
void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
u32 act_miss_mapping) u32 act_miss_mapping)
{ {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv);
struct mapping_ctx *ctx;
ctx = esw->offloads.reg_c0_obj_pool; if (is_mdev_switchdev_mode(priv->mdev))
mlx5_del_flow_rules(attr->act_id_restore_rule); mlx5_del_flow_rules(attr->act_id_restore_rule);
mapping_remove(ctx, act_miss_mapping); mapping_remove(ctx, act_miss_mapping);
} }

View file

@ -762,6 +762,17 @@ static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_t
} }
} }
void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq)
{
if (netif_tx_queue_stopped(sq->txq) &&
mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
mlx5e_ptpsq_fifo_has_room(sq) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
sq->stats->wake++;
}
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{ {
struct mlx5e_sq_stats *stats; struct mlx5e_sq_stats *stats;
@ -861,13 +872,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
netdev_tx_completed_queue(sq->txq, npkts, nbytes); netdev_tx_completed_queue(sq->txq, npkts, nbytes);
if (netif_tx_queue_stopped(sq->txq) && mlx5e_txqsq_wake(sq);
mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
mlx5e_ptpsq_fifo_has_room(sq) &&
!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
netif_tx_wake_queue(sq->txq);
stats->wake++;
}
return (i == MLX5E_TX_CQ_POLL_BUDGET); return (i == MLX5E_TX_CQ_POLL_BUDGET);
} }

View file

@ -161,20 +161,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
} }
} }
/* budget=0 means we may be in IRQ context, do as little as possible */
if (unlikely(!budget))
goto out;
busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp) if (c->xdp)
busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
if (likely(budget)) { /* budget=0 means: don't poll rx rings */ if (xsk_open)
if (xsk_open) work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
if (likely(budget - work_done)) if (likely(budget - work_done))
work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done);
busy |= work_done == budget; busy |= work_done == budget;
}
mlx5e_poll_ico_cq(&c->icosq.cq); mlx5e_poll_ico_cq(&c->icosq.cq);
if (mlx5e_poll_ico_cq(&c->async_icosq.cq)) if (mlx5e_poll_ico_cq(&c->async_icosq.cq))

View file

@ -1104,7 +1104,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
mlx5_irq_table_destroy(dev); mlx5_irq_table_free_irqs(dev);
mutex_unlock(&table->lock); mutex_unlock(&table->lock);
} }

View file

@ -342,6 +342,7 @@ struct mlx5_eswitch {
u32 large_group_num; u32 large_group_num;
} params; } params;
struct blocking_notifier_head n_head; struct blocking_notifier_head n_head;
bool paired[MLX5_MAX_PORTS];
}; };
void esw_offloads_disable(struct mlx5_eswitch *esw); void esw_offloads_disable(struct mlx5_eswitch *esw);
@ -369,6 +370,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
void mlx5_eswitch_disable(struct mlx5_eswitch *esw); void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw);
void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u16 vport, const u8 *mac); u16 vport, const u8 *mac);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
@ -767,6 +770,8 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {}
static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {}
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
static inline static inline
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }

View file

@ -2742,6 +2742,9 @@ static int mlx5_esw_offloads_devcom_event(int event,
mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break; break;
if (esw->paired[mlx5_get_dev_index(peer_esw->dev)])
break;
err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
if (err) if (err)
goto err_out; goto err_out;
@ -2753,14 +2756,18 @@ static int mlx5_esw_offloads_devcom_event(int event,
if (err) if (err)
goto err_pair; goto err_pair;
esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true;
peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true;
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
break; break;
case ESW_OFFLOADS_DEVCOM_UNPAIR: case ESW_OFFLOADS_DEVCOM_UNPAIR:
if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)])
break; break;
mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false;
peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false;
mlx5_esw_offloads_unpair(peer_esw); mlx5_esw_offloads_unpair(peer_esw);
mlx5_esw_offloads_unpair(esw); mlx5_esw_offloads_unpair(esw);
mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
@ -2779,7 +2786,7 @@ err_out:
return err; return err;
} }
static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw)
{ {
struct mlx5_devcom *devcom = esw->dev->priv.devcom; struct mlx5_devcom *devcom = esw->dev->priv.devcom;
@ -2802,7 +2809,7 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw)
ESW_OFFLOADS_DEVCOM_PAIR, esw); ESW_OFFLOADS_DEVCOM_PAIR, esw);
} }
static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
{ {
struct mlx5_devcom *devcom = esw->dev->priv.devcom; struct mlx5_devcom *devcom = esw->dev->priv.devcom;
@ -3250,8 +3257,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err) if (err)
goto err_vports; goto err_vports;
esw_offloads_devcom_init(esw);
return 0; return 0;
err_vports: err_vports:
@ -3292,7 +3297,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
void esw_offloads_disable(struct mlx5_eswitch *esw) void esw_offloads_disable(struct mlx5_eswitch *esw)
{ {
esw_offloads_devcom_cleanup(esw);
mlx5_eswitch_disable_pf_vf_vports(esw); mlx5_eswitch_disable_pf_vf_vports(esw);
esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
esw_set_passing_vport_metadata(esw, false); esw_set_passing_vport_metadata(esw, false);

View file

@ -3,6 +3,7 @@
#include <linux/mlx5/vport.h> #include <linux/mlx5/vport.h>
#include "lib/devcom.h" #include "lib/devcom.h"
#include "mlx5_core.h"
static LIST_HEAD(devcom_list); static LIST_HEAD(devcom_list);
@ -13,7 +14,7 @@ static LIST_HEAD(devcom_list);
struct mlx5_devcom_component { struct mlx5_devcom_component {
struct { struct {
void *data; void __rcu *data;
} device[MLX5_DEVCOM_PORTS_SUPPORTED]; } device[MLX5_DEVCOM_PORTS_SUPPORTED];
mlx5_devcom_event_handler_t handler; mlx5_devcom_event_handler_t handler;
@ -77,6 +78,7 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED)
return NULL; return NULL;
mlx5_dev_list_lock();
sguid0 = mlx5_query_nic_system_image_guid(dev); sguid0 = mlx5_query_nic_system_image_guid(dev);
list_for_each_entry(iter, &devcom_list, list) { list_for_each_entry(iter, &devcom_list, list) {
struct mlx5_core_dev *tmp_dev = NULL; struct mlx5_core_dev *tmp_dev = NULL;
@ -102,8 +104,10 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
if (!priv) { if (!priv) {
priv = mlx5_devcom_list_alloc(); priv = mlx5_devcom_list_alloc();
if (!priv) if (!priv) {
return ERR_PTR(-ENOMEM); devcom = ERR_PTR(-ENOMEM);
goto out;
}
idx = 0; idx = 0;
new_priv = true; new_priv = true;
@ -112,13 +116,16 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev)
priv->devs[idx] = dev; priv->devs[idx] = dev;
devcom = mlx5_devcom_alloc(priv, idx); devcom = mlx5_devcom_alloc(priv, idx);
if (!devcom) { if (!devcom) {
kfree(priv); if (new_priv)
return ERR_PTR(-ENOMEM); kfree(priv);
devcom = ERR_PTR(-ENOMEM);
goto out;
} }
if (new_priv) if (new_priv)
list_add(&priv->list, &devcom_list); list_add(&priv->list, &devcom_list);
out:
mlx5_dev_list_unlock();
return devcom; return devcom;
} }
@ -131,6 +138,7 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
if (IS_ERR_OR_NULL(devcom)) if (IS_ERR_OR_NULL(devcom))
return; return;
mlx5_dev_list_lock();
priv = devcom->priv; priv = devcom->priv;
priv->devs[devcom->idx] = NULL; priv->devs[devcom->idx] = NULL;
@ -141,10 +149,12 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom)
break; break;
if (i != MLX5_DEVCOM_PORTS_SUPPORTED) if (i != MLX5_DEVCOM_PORTS_SUPPORTED)
return; goto out;
list_del(&priv->list); list_del(&priv->list);
kfree(priv); kfree(priv);
out:
mlx5_dev_list_unlock();
} }
void mlx5_devcom_register_component(struct mlx5_devcom *devcom, void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
@ -162,7 +172,7 @@ void mlx5_devcom_register_component(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id]; comp = &devcom->priv->components[id];
down_write(&comp->sem); down_write(&comp->sem);
comp->handler = handler; comp->handler = handler;
comp->device[devcom->idx].data = data; rcu_assign_pointer(comp->device[devcom->idx].data, data);
up_write(&comp->sem); up_write(&comp->sem);
} }
@ -176,8 +186,9 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id]; comp = &devcom->priv->components[id];
down_write(&comp->sem); down_write(&comp->sem);
comp->device[devcom->idx].data = NULL; RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL);
up_write(&comp->sem); up_write(&comp->sem);
synchronize_rcu();
} }
int mlx5_devcom_send_event(struct mlx5_devcom *devcom, int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
@ -193,12 +204,15 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id]; comp = &devcom->priv->components[id];
down_write(&comp->sem); down_write(&comp->sem);
for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) {
if (i != devcom->idx && comp->device[i].data) { void *data = rcu_dereference_protected(comp->device[i].data,
err = comp->handler(event, comp->device[i].data, lockdep_is_held(&comp->sem));
event_data);
if (i != devcom->idx && data) {
err = comp->handler(event, data, event_data);
break; break;
} }
}
up_write(&comp->sem); up_write(&comp->sem);
return err; return err;
@ -213,7 +227,7 @@ void mlx5_devcom_set_paired(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id]; comp = &devcom->priv->components[id];
WARN_ON(!rwsem_is_locked(&comp->sem)); WARN_ON(!rwsem_is_locked(&comp->sem));
comp->paired = paired; WRITE_ONCE(comp->paired, paired);
} }
bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
@ -222,7 +236,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
if (IS_ERR_OR_NULL(devcom)) if (IS_ERR_OR_NULL(devcom))
return false; return false;
return devcom->priv->components[id].paired; return READ_ONCE(devcom->priv->components[id].paired);
} }
void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
@ -236,7 +250,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
comp = &devcom->priv->components[id]; comp = &devcom->priv->components[id];
down_read(&comp->sem); down_read(&comp->sem);
if (!comp->paired) { if (!READ_ONCE(comp->paired)) {
up_read(&comp->sem); up_read(&comp->sem);
return NULL; return NULL;
} }
@ -245,7 +259,29 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
if (i != devcom->idx) if (i != devcom->idx)
break; break;
return comp->device[i].data; return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem));
}
void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id)
{
struct mlx5_devcom_component *comp;
int i;
if (IS_ERR_OR_NULL(devcom))
return NULL;
for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++)
if (i != devcom->idx)
break;
comp = &devcom->priv->components[id];
/* This can change concurrently, however 'data' pointer will remain
* valid for the duration of RCU read section.
*/
if (!READ_ONCE(comp->paired))
return NULL;
return rcu_dereference(comp->device[i].data);
} }
void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,

View file

@ -41,6 +41,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom,
void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom,
enum mlx5_devcom_components id); enum mlx5_devcom_components id);
void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id);
void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom,
enum mlx5_devcom_components id); enum mlx5_devcom_components id);

View file

@ -1049,7 +1049,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
dev->dm = mlx5_dm_create(dev); dev->dm = mlx5_dm_create(dev);
if (IS_ERR(dev->dm)) if (IS_ERR(dev->dm))
mlx5_core_warn(dev, "Failed to init device memory%d\n", err); mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm));
dev->tracer = mlx5_fw_tracer_create(dev); dev->tracer = mlx5_fw_tracer_create(dev);
dev->hv_vhca = mlx5_hv_vhca_create(dev); dev->hv_vhca = mlx5_hv_vhca_create(dev);

View file

@ -15,6 +15,7 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
int mlx5_irq_table_create(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev);
void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev);
void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev);
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table);
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table);
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev);

View file

@ -32,6 +32,7 @@ struct mlx5_irq {
struct mlx5_irq_pool *pool; struct mlx5_irq_pool *pool;
int refcount; int refcount;
struct msi_map map; struct msi_map map;
u32 pool_index;
}; };
struct mlx5_irq_table { struct mlx5_irq_table {
@ -132,7 +133,7 @@ static void irq_release(struct mlx5_irq *irq)
struct cpu_rmap *rmap; struct cpu_rmap *rmap;
#endif #endif
xa_erase(&pool->irqs, irq->map.index); xa_erase(&pool->irqs, irq->pool_index);
/* free_irq requires that affinity_hint and rmap will be cleared before /* free_irq requires that affinity_hint and rmap will be cleared before
* calling it. To satisfy this requirement, we call * calling it. To satisfy this requirement, we call
* irq_cpu_rmap_remove() to remove the notifier * irq_cpu_rmap_remove() to remove the notifier
@ -276,11 +277,11 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
} }
irq->pool = pool; irq->pool = pool;
irq->refcount = 1; irq->refcount = 1;
irq->map.index = i; irq->pool_index = i;
err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL)); err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL));
if (err) { if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
irq->map.index, err); irq->pool_index, err);
goto err_xa; goto err_xa;
} }
return irq; return irq;
@ -567,7 +568,7 @@ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
struct mlx5_irq *irq; struct mlx5_irq *irq;
int i; int i;
af_desc.is_managed = 1; af_desc.is_managed = false;
for (i = 0; i < nirqs; i++) { for (i = 0; i < nirqs; i++) {
cpumask_set_cpu(cpus[i], &af_desc.mask); cpumask_set_cpu(cpus[i], &af_desc.mask);
irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
@ -691,6 +692,24 @@ static void irq_pools_destroy(struct mlx5_irq_table *table)
irq_pool_free(table->pcif_pool); irq_pool_free(table->pcif_pool);
} }
static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool)
{
struct mlx5_irq *irq;
unsigned long index;
xa_for_each(&pool->irqs, index, irq)
free_irq(irq->map.virq, &irq->nh);
}
static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table)
{
if (table->sf_ctrl_pool) {
mlx5_irq_pool_free_irqs(table->sf_comp_pool);
mlx5_irq_pool_free_irqs(table->sf_ctrl_pool);
}
mlx5_irq_pool_free_irqs(table->pcif_pool);
}
/* irq_table API */ /* irq_table API */
int mlx5_irq_table_init(struct mlx5_core_dev *dev) int mlx5_irq_table_init(struct mlx5_core_dev *dev)
@ -774,6 +793,17 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev)
pci_free_irq_vectors(dev->pdev); pci_free_irq_vectors(dev->pdev);
} }
void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_irq_table *table = dev->priv.irq_table;
if (mlx5_core_is_sf(dev))
return;
mlx5_irq_pools_free_irqs(table);
pci_free_irq_vectors(dev->pdev);
}
int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
{ {
if (table->sf_comp_pool) if (table->sf_comp_pool)

View file

@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id);
caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols);
caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version);
caps->roce_caps.fl_rc_qp_when_roce_disabled =
MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled);
if (MLX5_CAP_GEN(mdev, roce)) { if (MLX5_CAP_GEN(mdev, roce)) {
err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en); err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
return err; return err;
caps->roce_caps.roce_en = roce_en; caps->roce_caps.roce_en = roce_en;
caps->roce_caps.fl_rc_qp_when_roce_disabled = caps->roce_caps.fl_rc_qp_when_roce_disabled |=
MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled); MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
caps->roce_caps.fl_rc_qp_when_roce_enabled = caps->roce_caps.fl_rc_qp_when_roce_enabled =
MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled); MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);

View file

@ -15,7 +15,8 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
{ {
u32 crc = crc32(0, input_data, length); u32 crc = crc32(0, input_data, length);
return (__force u32)htonl(crc); return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) |
((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000);
} }
bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps) bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps)

View file

@ -1039,6 +1039,16 @@ static int lan966x_reset_switch(struct lan966x *lan966x)
reset_control_reset(switch_reset); reset_control_reset(switch_reset);
/* Don't reinitialize the switch core, if it is already initialized. In
* case it is initialized twice, some pointers inside the queue system
* in HW will get corrupted and then after a while the queue system gets
* full and no traffic is passing through the switch. The issue is seen
* when loading and unloading the driver and sending traffic through the
* switch.
*/
if (lan_rd(lan966x, SYS_RESET_CFG) & SYS_RESET_CFG_CORE_ENA)
return 0;
lan_wr(SYS_RESET_CFG_CORE_ENA_SET(0), lan966x, SYS_RESET_CFG); lan_wr(SYS_RESET_CFG_CORE_ENA_SET(0), lan966x, SYS_RESET_CFG);
lan_wr(SYS_RAM_INIT_RAM_INIT_SET(1), lan966x, SYS_RAM_INIT); lan_wr(SYS_RAM_INIT_RAM_INIT_SET(1), lan966x, SYS_RAM_INIT);
ret = readx_poll_timeout(lan966x_ram_init, lan966x, ret = readx_poll_timeout(lan966x_ram_init, lan966x,

View file

@ -6138,6 +6138,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
return 0; return 0;
out_error: out_error:
nv_mgmt_release_sema(dev);
if (phystate_orig) if (phystate_orig)
writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl); writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl);
out_freering: out_freering:

View file

@ -616,10 +616,10 @@ struct rtl8169_private {
struct work_struct work; struct work_struct work;
} wk; } wk;
spinlock_t config25_lock; raw_spinlock_t config25_lock;
spinlock_t mac_ocp_lock; raw_spinlock_t mac_ocp_lock;
spinlock_t cfg9346_usage_lock; raw_spinlock_t cfg9346_usage_lock;
int cfg9346_usage_count; int cfg9346_usage_count;
unsigned supports_gmii:1; unsigned supports_gmii:1;
@ -671,20 +671,20 @@ static void rtl_lock_config_regs(struct rtl8169_private *tp)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&tp->cfg9346_usage_lock, flags); raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
if (!--tp->cfg9346_usage_count) if (!--tp->cfg9346_usage_count)
RTL_W8(tp, Cfg9346, Cfg9346_Lock); RTL_W8(tp, Cfg9346, Cfg9346_Lock);
spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags); raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
} }
static void rtl_unlock_config_regs(struct rtl8169_private *tp) static void rtl_unlock_config_regs(struct rtl8169_private *tp)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&tp->cfg9346_usage_lock, flags); raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags);
if (!tp->cfg9346_usage_count++) if (!tp->cfg9346_usage_count++)
RTL_W8(tp, Cfg9346, Cfg9346_Unlock); RTL_W8(tp, Cfg9346, Cfg9346_Unlock);
spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags); raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags);
} }
static void rtl_pci_commit(struct rtl8169_private *tp) static void rtl_pci_commit(struct rtl8169_private *tp)
@ -698,10 +698,10 @@ static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set)
unsigned long flags; unsigned long flags;
u8 val; u8 val;
spin_lock_irqsave(&tp->config25_lock, flags); raw_spin_lock_irqsave(&tp->config25_lock, flags);
val = RTL_R8(tp, Config2); val = RTL_R8(tp, Config2);
RTL_W8(tp, Config2, (val & ~clear) | set); RTL_W8(tp, Config2, (val & ~clear) | set);
spin_unlock_irqrestore(&tp->config25_lock, flags); raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
} }
static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set) static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
@ -709,10 +709,10 @@ static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set)
unsigned long flags; unsigned long flags;
u8 val; u8 val;
spin_lock_irqsave(&tp->config25_lock, flags); raw_spin_lock_irqsave(&tp->config25_lock, flags);
val = RTL_R8(tp, Config5); val = RTL_R8(tp, Config5);
RTL_W8(tp, Config5, (val & ~clear) | set); RTL_W8(tp, Config5, (val & ~clear) | set);
spin_unlock_irqrestore(&tp->config25_lock, flags); raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
} }
static bool rtl_is_8125(struct rtl8169_private *tp) static bool rtl_is_8125(struct rtl8169_private *tp)
@ -899,9 +899,9 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
{ {
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&tp->mac_ocp_lock, flags); raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
__r8168_mac_ocp_write(tp, reg, data); __r8168_mac_ocp_write(tp, reg, data);
spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
} }
static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
@ -919,9 +919,9 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg)
unsigned long flags; unsigned long flags;
u16 val; u16 val;
spin_lock_irqsave(&tp->mac_ocp_lock, flags); raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
val = __r8168_mac_ocp_read(tp, reg); val = __r8168_mac_ocp_read(tp, reg);
spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
return val; return val;
} }
@ -932,10 +932,10 @@ static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask,
unsigned long flags; unsigned long flags;
u16 data; u16 data;
spin_lock_irqsave(&tp->mac_ocp_lock, flags); raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
data = __r8168_mac_ocp_read(tp, reg); data = __r8168_mac_ocp_read(tp, reg);
__r8168_mac_ocp_write(tp, reg, (data & ~mask) | set); __r8168_mac_ocp_write(tp, reg, (data & ~mask) | set);
spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
} }
/* Work around a hw issue with RTL8168g PHY, the quirk disables /* Work around a hw issue with RTL8168g PHY, the quirk disables
@ -1420,14 +1420,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0); r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0);
} }
spin_lock_irqsave(&tp->config25_lock, flags); raw_spin_lock_irqsave(&tp->config25_lock, flags);
for (i = 0; i < tmp; i++) { for (i = 0; i < tmp; i++) {
options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask; options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask;
if (wolopts & cfg[i].opt) if (wolopts & cfg[i].opt)
options |= cfg[i].mask; options |= cfg[i].mask;
RTL_W8(tp, cfg[i].reg, options); RTL_W8(tp, cfg[i].reg, options);
} }
spin_unlock_irqrestore(&tp->config25_lock, flags); raw_spin_unlock_irqrestore(&tp->config25_lock, flags);
switch (tp->mac_version) { switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
@ -5179,9 +5179,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->eee_adv = -1; tp->eee_adv = -1;
tp->ocp_base = OCP_STD_PHY_BASE; tp->ocp_base = OCP_STD_PHY_BASE;
spin_lock_init(&tp->cfg9346_usage_lock); raw_spin_lock_init(&tp->cfg9346_usage_lock);
spin_lock_init(&tp->config25_lock); raw_spin_lock_init(&tp->config25_lock);
spin_lock_init(&tp->mac_ocp_lock); raw_spin_lock_init(&tp->mac_ocp_lock);
dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev,
struct pcpu_sw_netstats); struct pcpu_sw_netstats);

View file

@ -171,9 +171,14 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
rc = efx_mcdi_nvram_metadata(efx, partition_type, NULL, version, NULL, rc = efx_mcdi_nvram_metadata(efx, partition_type, NULL, version, NULL,
0); 0);
/* If the partition does not exist, that is not an error. */
if (rc == -ENOENT)
return 0;
if (rc) { if (rc) {
netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed\n", netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed (rc=%d)\n",
version_name); version_name, rc);
return rc; return rc;
} }
@ -187,36 +192,33 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx,
static int efx_devlink_info_stored_versions(struct efx_nic *efx, static int efx_devlink_info_stored_versions(struct efx_nic *efx,
struct devlink_info_req *req) struct devlink_info_req *req)
{ {
int rc; int err;
rc = efx_devlink_info_nvram_partition(efx, req, /* We do not care here about the specific error but just if an error
NVRAM_PARTITION_TYPE_BUNDLE, * happened. The specific error will be reported inside the call
DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID); * through system messages, and if any error happened in any call
if (rc) * below, we report it through extack.
return rc; */
err = efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_BUNDLE,
DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID);
rc = efx_devlink_info_nvram_partition(efx, req, err |= efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_MC_FIRMWARE, NVRAM_PARTITION_TYPE_MC_FIRMWARE,
DEVLINK_INFO_VERSION_GENERIC_FW_MGMT); DEVLINK_INFO_VERSION_GENERIC_FW_MGMT);
if (rc)
return rc;
rc = efx_devlink_info_nvram_partition(efx, req, err |= efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_SUC_FIRMWARE, NVRAM_PARTITION_TYPE_SUC_FIRMWARE,
EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC); EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC);
if (rc)
return rc;
rc = efx_devlink_info_nvram_partition(efx, req, err |= efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_EXPANSION_ROM, NVRAM_PARTITION_TYPE_EXPANSION_ROM,
EFX_DEVLINK_INFO_VERSION_FW_EXPROM); EFX_DEVLINK_INFO_VERSION_FW_EXPROM);
if (rc)
return rc;
rc = efx_devlink_info_nvram_partition(efx, req, err |= efx_devlink_info_nvram_partition(efx, req,
NVRAM_PARTITION_TYPE_EXPANSION_UEFI, NVRAM_PARTITION_TYPE_EXPANSION_UEFI,
EFX_DEVLINK_INFO_VERSION_FW_UEFI); EFX_DEVLINK_INFO_VERSION_FW_UEFI);
return rc; return err;
} }
#define EFX_VER_FLAG(_f) \ #define EFX_VER_FLAG(_f) \
@ -587,27 +589,20 @@ static int efx_devlink_info_get(struct devlink *devlink,
{ {
struct efx_devlink *devlink_private = devlink_priv(devlink); struct efx_devlink *devlink_private = devlink_priv(devlink);
struct efx_nic *efx = devlink_private->efx; struct efx_nic *efx = devlink_private->efx;
int rc; int err;
/* Several different MCDI commands are used. We report first error /* Several different MCDI commands are used. We report if errors
* through extack returning at that point. Specific error * happened through extack. Specific error information via system
* information via system messages. * messages inside the calls.
*/ */
rc = efx_devlink_info_board_cfg(efx, req); err = efx_devlink_info_board_cfg(efx, req);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Getting board info failed"); err |= efx_devlink_info_stored_versions(efx, req);
return rc;
} err |= efx_devlink_info_running_versions(efx, req);
rc = efx_devlink_info_stored_versions(efx, req);
if (rc) { if (err)
NL_SET_ERR_MSG_MOD(extack, "Getting stored versions failed"); NL_SET_ERR_MSG_MOD(extack, "Errors when getting device info. Check system messages");
return rc;
}
rc = efx_devlink_info_running_versions(efx, req);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Getting running versions failed");
return rc;
}
return 0; return 0;
} }

View file

@ -179,6 +179,7 @@ enum rgmii_clock_delay {
#define VSC8502_RGMII_CNTL 20 #define VSC8502_RGMII_CNTL 20
#define VSC8502_RGMII_RX_DELAY_MASK 0x0070 #define VSC8502_RGMII_RX_DELAY_MASK 0x0070
#define VSC8502_RGMII_TX_DELAY_MASK 0x0007 #define VSC8502_RGMII_TX_DELAY_MASK 0x0007
#define VSC8502_RGMII_RX_CLK_DISABLE 0x0800
#define MSCC_PHY_WOL_LOWER_MAC_ADDR 21 #define MSCC_PHY_WOL_LOWER_MAC_ADDR 21
#define MSCC_PHY_WOL_MID_MAC_ADDR 22 #define MSCC_PHY_WOL_MID_MAC_ADDR 22
@ -276,6 +277,7 @@ enum rgmii_clock_delay {
/* Microsemi PHY ID's /* Microsemi PHY ID's
* Code assumes lowest nibble is 0 * Code assumes lowest nibble is 0
*/ */
#define PHY_ID_VSC8501 0x00070530
#define PHY_ID_VSC8502 0x00070630 #define PHY_ID_VSC8502 0x00070630
#define PHY_ID_VSC8504 0x000704c0 #define PHY_ID_VSC8504 0x000704c0
#define PHY_ID_VSC8514 0x00070670 #define PHY_ID_VSC8514 0x00070670

View file

@ -519,16 +519,27 @@ out_unlock:
* * 2.0 ns (which causes the data to be sampled at exactly half way between * * 2.0 ns (which causes the data to be sampled at exactly half way between
* clock transitions at 1000 Mbps) if delays should be enabled * clock transitions at 1000 Mbps) if delays should be enabled
*/ */
static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl, static int vsc85xx_update_rgmii_cntl(struct phy_device *phydev, u32 rgmii_cntl,
u16 rgmii_rx_delay_mask, u16 rgmii_rx_delay_mask,
u16 rgmii_tx_delay_mask) u16 rgmii_tx_delay_mask)
{ {
u16 rgmii_rx_delay_pos = ffs(rgmii_rx_delay_mask) - 1; u16 rgmii_rx_delay_pos = ffs(rgmii_rx_delay_mask) - 1;
u16 rgmii_tx_delay_pos = ffs(rgmii_tx_delay_mask) - 1; u16 rgmii_tx_delay_pos = ffs(rgmii_tx_delay_mask) - 1;
u16 reg_val = 0; u16 reg_val = 0;
int rc; u16 mask = 0;
int rc = 0;
mutex_lock(&phydev->lock); /* For traffic to pass, the VSC8502 family needs the RX_CLK disable bit
* to be unset for all PHY modes, so do that as part of the paged
* register modification.
* For some family members (like VSC8530/31/40/41) this bit is reserved
* and read-only, and the RX clock is enabled by default.
*/
if (rgmii_cntl == VSC8502_RGMII_CNTL)
mask |= VSC8502_RGMII_RX_CLK_DISABLE;
if (phy_interface_is_rgmii(phydev))
mask |= rgmii_rx_delay_mask | rgmii_tx_delay_mask;
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID || if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID ||
phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
@ -537,31 +548,20 @@ static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_tx_delay_pos; reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_tx_delay_pos;
rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2, if (mask)
rgmii_cntl, rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
rgmii_rx_delay_mask | rgmii_tx_delay_mask, rgmii_cntl, mask, reg_val);
reg_val);
mutex_unlock(&phydev->lock);
return rc; return rc;
} }
static int vsc85xx_default_config(struct phy_device *phydev) static int vsc85xx_default_config(struct phy_device *phydev)
{ {
int rc;
phydev->mdix_ctrl = ETH_TP_MDI_AUTO; phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
if (phy_interface_mode_is_rgmii(phydev->interface)) { return vsc85xx_update_rgmii_cntl(phydev, VSC8502_RGMII_CNTL,
rc = vsc85xx_rgmii_set_skews(phydev, VSC8502_RGMII_CNTL, VSC8502_RGMII_RX_DELAY_MASK,
VSC8502_RGMII_RX_DELAY_MASK, VSC8502_RGMII_TX_DELAY_MASK);
VSC8502_RGMII_TX_DELAY_MASK);
if (rc)
return rc;
}
return 0;
} }
static int vsc85xx_get_tunable(struct phy_device *phydev, static int vsc85xx_get_tunable(struct phy_device *phydev,
@ -1758,13 +1758,11 @@ static int vsc8584_config_init(struct phy_device *phydev)
if (ret) if (ret)
return ret; return ret;
if (phy_interface_is_rgmii(phydev)) { ret = vsc85xx_update_rgmii_cntl(phydev, VSC8572_RGMII_CNTL,
ret = vsc85xx_rgmii_set_skews(phydev, VSC8572_RGMII_CNTL, VSC8572_RGMII_RX_DELAY_MASK,
VSC8572_RGMII_RX_DELAY_MASK, VSC8572_RGMII_TX_DELAY_MASK);
VSC8572_RGMII_TX_DELAY_MASK); if (ret)
if (ret) return ret;
return ret;
}
ret = genphy_soft_reset(phydev); ret = genphy_soft_reset(phydev);
if (ret) if (ret)
@ -2316,6 +2314,30 @@ static int vsc85xx_probe(struct phy_device *phydev)
/* Microsemi VSC85xx PHYs */ /* Microsemi VSC85xx PHYs */
static struct phy_driver vsc85xx_driver[] = { static struct phy_driver vsc85xx_driver[] = {
{
.phy_id = PHY_ID_VSC8501,
.name = "Microsemi GE VSC8501 SyncE",
.phy_id_mask = 0xfffffff0,
/* PHY_BASIC_FEATURES */
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
.read_status = &vsc85xx_read_status,
.handle_interrupt = vsc85xx_handle_interrupt,
.config_intr = &vsc85xx_config_intr,
.suspend = &genphy_suspend,
.resume = &genphy_resume,
.probe = &vsc85xx_probe,
.set_wol = &vsc85xx_wol_set,
.get_wol = &vsc85xx_wol_get,
.get_tunable = &vsc85xx_get_tunable,
.set_tunable = &vsc85xx_set_tunable,
.read_page = &vsc85xx_phy_read_page,
.write_page = &vsc85xx_phy_write_page,
.get_sset_count = &vsc85xx_get_sset_count,
.get_strings = &vsc85xx_get_strings,
.get_stats = &vsc85xx_get_stats,
},
{ {
.phy_id = PHY_ID_VSC8502, .phy_id = PHY_ID_VSC8502,
.name = "Microsemi GE VSC8502 SyncE", .name = "Microsemi GE VSC8502 SyncE",
@ -2656,6 +2678,8 @@ static struct phy_driver vsc85xx_driver[] = {
module_phy_driver(vsc85xx_driver); module_phy_driver(vsc85xx_driver);
static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = { static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
{ PHY_ID_VSC8501, 0xfffffff0, },
{ PHY_ID_VSC8502, 0xfffffff0, },
{ PHY_ID_VSC8504, 0xfffffff0, }, { PHY_ID_VSC8504, 0xfffffff0, },
{ PHY_ID_VSC8514, 0xfffffff0, }, { PHY_ID_VSC8514, 0xfffffff0, },
{ PHY_ID_VSC8530, 0xfffffff0, }, { PHY_ID_VSC8530, 0xfffffff0, },

View file

@ -1629,6 +1629,7 @@ static int team_init(struct net_device *dev)
team->dev = dev; team->dev = dev;
team_set_no_mode(team); team_set_no_mode(team);
team->notifier_ctx = false;
team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats); team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats);
if (!team->pcpu_stats) if (!team->pcpu_stats)
@ -3022,7 +3023,11 @@ static int team_device_event(struct notifier_block *unused,
team_del_slave(port->team->dev, dev); team_del_slave(port->team->dev, dev);
break; break;
case NETDEV_FEAT_CHANGE: case NETDEV_FEAT_CHANGE:
team_compute_features(port->team); if (!port->team->notifier_ctx) {
port->team->notifier_ctx = true;
team_compute_features(port->team);
port->team->notifier_ctx = false;
}
break; break;
case NETDEV_PRECHANGEMTU: case NETDEV_PRECHANGEMTU:
/* Forbid to change mtu of underlaying device */ /* Forbid to change mtu of underlaying device */

View file

@ -181,9 +181,12 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx)
else else
min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32); min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32);
max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); if (le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) == 0)
if (max == 0)
max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */ max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */
else
max = clamp_t(u32, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize),
USB_CDC_NCM_NTB_MIN_OUT_SIZE,
CDC_NCM_NTB_MAX_SIZE_TX);
/* some devices set dwNtbOutMaxSize too low for the above default */ /* some devices set dwNtbOutMaxSize too low for the above default */
min = min(min, max); min = min(min, max);
@ -1244,6 +1247,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
* further. * further.
*/ */
if (skb_out == NULL) { if (skb_out == NULL) {
/* If even the smallest allocation fails, abort. */
if (ctx->tx_curr_size == USB_CDC_NCM_NTB_MIN_OUT_SIZE)
goto alloc_failed;
ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1, ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1,
(unsigned)CDC_NCM_LOW_MEM_MAX_CNT); (unsigned)CDC_NCM_LOW_MEM_MAX_CNT);
ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt; ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt;
@ -1262,13 +1268,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC); skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC);
/* No allocation possible so we will abort */ /* No allocation possible so we will abort */
if (skb_out == NULL) { if (!skb_out)
if (skb != NULL) { goto alloc_failed;
dev_kfree_skb_any(skb);
dev->net->stats.tx_dropped++;
}
goto exit_no_skb;
}
ctx->tx_low_mem_val--; ctx->tx_low_mem_val--;
} }
if (ctx->is_ndp16) { if (ctx->is_ndp16) {
@ -1461,6 +1462,11 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
return skb_out; return skb_out;
alloc_failed:
if (skb) {
dev_kfree_skb_any(skb);
dev->net->stats.tx_dropped++;
}
exit_no_skb: exit_no_skb:
/* Start timer, if there is a remaining non-empty skb */ /* Start timer, if there is a remaining non-empty skb */
if (ctx->tx_curr_skb != NULL && n > 0) if (ctx->tx_curr_skb != NULL && n > 0)

View file

@ -208,6 +208,7 @@ struct team {
bool queue_override_enabled; bool queue_override_enabled;
struct list_head *qom_lists; /* array of queue override mapping lists */ struct list_head *qom_lists; /* array of queue override mapping lists */
bool port_mtu_change_allowed; bool port_mtu_change_allowed;
bool notifier_ctx;
struct { struct {
unsigned int count; unsigned int count;
unsigned int interval; /* in ms */ unsigned int interval; /* in ms */

View file

@ -1705,7 +1705,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 rc[0x1]; u8 rc[0x1];
u8 uar_4k[0x1]; u8 uar_4k[0x1];
u8 reserved_at_241[0x9]; u8 reserved_at_241[0x7];
u8 fl_rc_qp_when_roce_disabled[0x1];
u8 regexp_params[0x1];
u8 uar_sz[0x6]; u8 uar_sz[0x6];
u8 port_selection_cap[0x1]; u8 port_selection_cap[0x1];
u8 reserved_at_248[0x1]; u8 reserved_at_248[0x1];

View file

@ -1587,6 +1587,16 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
to->l4_hash = from->l4_hash; to->l4_hash = from->l4_hash;
}; };
static inline int skb_cmp_decrypted(const struct sk_buff *skb1,
const struct sk_buff *skb2)
{
#ifdef CONFIG_TLS_DEVICE
return skb2->decrypted - skb1->decrypted;
#else
return 0;
#endif
}
static inline void skb_copy_decrypted(struct sk_buff *to, static inline void skb_copy_decrypted(struct sk_buff *to,
const struct sk_buff *from) const struct sk_buff *from)
{ {

View file

@ -71,7 +71,6 @@ struct sk_psock_link {
}; };
struct sk_psock_work_state { struct sk_psock_work_state {
struct sk_buff *skb;
u32 len; u32 len;
u32 off; u32 off;
}; };
@ -105,7 +104,7 @@ struct sk_psock {
struct proto *sk_proto; struct proto *sk_proto;
struct mutex work_mutex; struct mutex work_mutex;
struct sk_psock_work_state work_state; struct sk_psock_work_state work_state;
struct work_struct work; struct delayed_work work;
struct rcu_work rwork; struct rcu_work rwork;
}; };

View file

@ -1327,7 +1327,7 @@ int hci_le_create_cis(struct hci_conn *conn);
struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
u8 role); u8 role);
int hci_conn_del(struct hci_conn *conn); void hci_conn_del(struct hci_conn *conn);
void hci_conn_hash_flush(struct hci_dev *hdev); void hci_conn_hash_flush(struct hci_dev *hdev);
void hci_conn_check_pending(struct hci_dev *hdev); void hci_conn_check_pending(struct hci_dev *hdev);

View file

@ -221,6 +221,7 @@ struct bonding {
struct bond_up_slave __rcu *usable_slaves; struct bond_up_slave __rcu *usable_slaves;
struct bond_up_slave __rcu *all_slaves; struct bond_up_slave __rcu *all_slaves;
bool force_primary; bool force_primary;
bool notifier_ctx;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *, int (*recv_probe)(const struct sk_buff *, struct bonding *,
struct slave *); struct slave *);

View file

@ -24,6 +24,7 @@ struct tls_handshake_args {
struct socket *ta_sock; struct socket *ta_sock;
tls_done_func_t ta_done; tls_done_func_t ta_done;
void *ta_data; void *ta_data;
const char *ta_peername;
unsigned int ta_timeout_ms; unsigned int ta_timeout_ms;
key_serial_t ta_keyring; key_serial_t ta_keyring;
key_serial_t ta_my_cert; key_serial_t ta_my_cert;

View file

@ -76,6 +76,7 @@ struct ipcm_cookie {
__be32 addr; __be32 addr;
int oif; int oif;
struct ip_options_rcu *opt; struct ip_options_rcu *opt;
__u8 protocol;
__u8 ttl; __u8 ttl;
__s16 tos; __s16 tos;
char priority; char priority;
@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
ipcm->sockc.tsflags = inet->sk.sk_tsflags; ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr; ipcm->addr = inet->inet_saddr;
ipcm->protocol = inet->inet_num;
} }
#define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))

View file

@ -399,22 +399,4 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid); page_pool_update_nid(pool, new_nid);
} }
static inline void page_pool_ring_lock(struct page_pool *pool)
__acquires(&pool->ring.producer_lock)
{
if (in_softirq())
spin_lock(&pool->ring.producer_lock);
else
spin_lock_bh(&pool->ring.producer_lock);
}
static inline void page_pool_ring_unlock(struct page_pool *pool)
__releases(&pool->ring.producer_lock)
{
if (in_softirq())
spin_unlock(&pool->ring.producer_lock);
else
spin_unlock_bh(&pool->ring.producer_lock);
}
#endif /* _NET_PAGE_POOL_H */ #endif /* _NET_PAGE_POOL_H */

View file

@ -1470,6 +1470,8 @@ static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
} }
void tcp_cleanup_rbuf(struct sock *sk, int copied); void tcp_cleanup_rbuf(struct sock *sk, int copied);
void __tcp_cleanup_rbuf(struct sock *sk, int copied);
/* We provision sk_rcvbuf around 200% of sk_rcvlowat. /* We provision sk_rcvbuf around 200% of sk_rcvlowat.
* If 87.5 % (7/8) of the space has been consumed, we want to override * If 87.5 % (7/8) of the space has been consumed, we want to override
@ -2326,6 +2328,14 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_BPF_SYSCALL */
#ifdef CONFIG_INET
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb);
#else
static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
{
}
#endif
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress, int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
struct sk_msg *msg, u32 bytes, int flags); struct sk_msg *msg, u32 bytes, int flags);
#endif /* CONFIG_NET_SOCK_MSG */ #endif /* CONFIG_NET_SOCK_MSG */

View file

@ -126,6 +126,7 @@ struct tls_strparser {
u32 mark : 8; u32 mark : 8;
u32 stopped : 1; u32 stopped : 1;
u32 copy_mode : 1; u32 copy_mode : 1;
u32 mixed_decrypted : 1;
u32 msg_ready : 1; u32 msg_ready : 1;
struct strp_msg stm; struct strp_msg stm;

View file

@ -44,6 +44,7 @@ enum {
HANDSHAKE_A_ACCEPT_AUTH_MODE, HANDSHAKE_A_ACCEPT_AUTH_MODE,
HANDSHAKE_A_ACCEPT_PEER_IDENTITY, HANDSHAKE_A_ACCEPT_PEER_IDENTITY,
HANDSHAKE_A_ACCEPT_CERTIFICATE, HANDSHAKE_A_ACCEPT_CERTIFICATE,
HANDSHAKE_A_ACCEPT_PEERNAME,
__HANDSHAKE_A_ACCEPT_MAX, __HANDSHAKE_A_ACCEPT_MAX,
HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1) HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1)

View file

@ -163,6 +163,7 @@ struct in_addr {
#define IP_MULTICAST_ALL 49 #define IP_MULTICAST_ALL 49
#define IP_UNICAST_IF 50 #define IP_UNICAST_IF 50
#define IP_LOCAL_PORT_RANGE 51 #define IP_LOCAL_PORT_RANGE 51
#define IP_PROTOCOL 52
#define MCAST_EXCLUDE 0 #define MCAST_EXCLUDE 0
#define MCAST_INCLUDE 1 #define MCAST_INCLUDE 1

View file

@ -1215,7 +1215,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
ret = htab_lock_bucket(htab, b, hash, &flags); ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret) if (ret)
return ret; goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size); l_old = lookup_elem_raw(head, hash, key, key_size);
@ -1236,6 +1236,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
err: err:
htab_unlock_bucket(htab, b, hash, flags); htab_unlock_bucket(htab, b, hash, flags);
err_lock_bucket:
if (ret) if (ret)
htab_lru_push_free(htab, l_new); htab_lru_push_free(htab, l_new);
else if (l_old) else if (l_old)
@ -1338,7 +1339,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = htab_lock_bucket(htab, b, hash, &flags); ret = htab_lock_bucket(htab, b, hash, &flags);
if (ret) if (ret)
return ret; goto err_lock_bucket;
l_old = lookup_elem_raw(head, hash, key, key_size); l_old = lookup_elem_raw(head, hash, key, key_size);
@ -1361,6 +1362,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
ret = 0; ret = 0;
err: err:
htab_unlock_bucket(htab, b, hash, flags); htab_unlock_bucket(htab, b, hash, flags);
err_lock_bucket:
if (l_new) if (l_new)
bpf_lru_push_free(&htab->lru, &l_new->lru_node); bpf_lru_push_free(&htab->lru, &l_new->lru_node);
return ret; return ret;

View file

@ -859,4 +859,4 @@ static int __init bpf_offload_init(void)
return rhashtable_init(&offdevs, &offdevs_params); return rhashtable_init(&offdevs, &offdevs_params);
} }
late_initcall(bpf_offload_init); core_initcall(bpf_offload_init);

View file

@ -17033,7 +17033,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
insn->dst_reg, insn->dst_reg,
shift); shift);
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
(1ULL << size * 8) - 1); (1ULL << size * 8) - 1);
} }
} }

View file

@ -1083,8 +1083,28 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!conn->parent) { if (!conn->parent) {
struct hci_link *link, *t; struct hci_link *link, *t;
list_for_each_entry_safe(link, t, &conn->link_list, list) list_for_each_entry_safe(link, t, &conn->link_list, list) {
hci_conn_unlink(link->conn); struct hci_conn *child = link->conn;
hci_conn_unlink(child);
/* If hdev is down it means
* hci_dev_close_sync/hci_conn_hash_flush is in progress
* and links don't need to be cleanup as all connections
* would be cleanup.
*/
if (!test_bit(HCI_UP, &hdev->flags))
continue;
/* Due to race, SCO connection might be not established
* yet at this point. Delete it now, otherwise it is
* possible for it to be stuck and can't be deleted.
*/
if ((child->type == SCO_LINK ||
child->type == ESCO_LINK) &&
child->handle == HCI_CONN_HANDLE_UNSET)
hci_conn_del(child);
}
return; return;
} }
@ -1092,35 +1112,30 @@ static void hci_conn_unlink(struct hci_conn *conn)
if (!conn->link) if (!conn->link)
return; return;
hci_conn_put(conn->parent);
conn->parent = NULL;
list_del_rcu(&conn->link->list); list_del_rcu(&conn->link->list);
synchronize_rcu(); synchronize_rcu();
hci_conn_drop(conn->parent);
hci_conn_put(conn->parent);
conn->parent = NULL;
kfree(conn->link); kfree(conn->link);
conn->link = NULL; conn->link = NULL;
/* Due to race, SCO connection might be not established
* yet at this point. Delete it now, otherwise it is
* possible for it to be stuck and can't be deleted.
*/
if (conn->handle == HCI_CONN_HANDLE_UNSET)
hci_conn_del(conn);
} }
int hci_conn_del(struct hci_conn *conn) void hci_conn_del(struct hci_conn *conn)
{ {
struct hci_dev *hdev = conn->hdev; struct hci_dev *hdev = conn->hdev;
BT_DBG("%s hcon %p handle %d", hdev->name, conn, conn->handle); BT_DBG("%s hcon %p handle %d", hdev->name, conn, conn->handle);
hci_conn_unlink(conn);
cancel_delayed_work_sync(&conn->disc_work); cancel_delayed_work_sync(&conn->disc_work);
cancel_delayed_work_sync(&conn->auto_accept_work); cancel_delayed_work_sync(&conn->auto_accept_work);
cancel_delayed_work_sync(&conn->idle_work); cancel_delayed_work_sync(&conn->idle_work);
if (conn->type == ACL_LINK) { if (conn->type == ACL_LINK) {
hci_conn_unlink(conn);
/* Unacked frames */ /* Unacked frames */
hdev->acl_cnt += conn->sent; hdev->acl_cnt += conn->sent;
} else if (conn->type == LE_LINK) { } else if (conn->type == LE_LINK) {
@ -1131,13 +1146,6 @@ int hci_conn_del(struct hci_conn *conn)
else else
hdev->acl_cnt += conn->sent; hdev->acl_cnt += conn->sent;
} else { } else {
struct hci_conn *acl = conn->parent;
if (acl) {
hci_conn_unlink(conn);
hci_conn_drop(acl);
}
/* Unacked ISO frames */ /* Unacked ISO frames */
if (conn->type == ISO_LINK) { if (conn->type == ISO_LINK) {
if (hdev->iso_pkts) if (hdev->iso_pkts)
@ -1160,8 +1168,6 @@ int hci_conn_del(struct hci_conn *conn)
* rest of hci_conn_del. * rest of hci_conn_del.
*/ */
hci_conn_cleanup(conn); hci_conn_cleanup(conn);
return 0;
} }
struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
@ -2462,22 +2468,21 @@ timer:
/* Drop all connection on the device */ /* Drop all connection on the device */
void hci_conn_hash_flush(struct hci_dev *hdev) void hci_conn_hash_flush(struct hci_dev *hdev)
{ {
struct hci_conn_hash *h = &hdev->conn_hash; struct list_head *head = &hdev->conn_hash.list;
struct hci_conn *c, *n; struct hci_conn *conn;
BT_DBG("hdev %s", hdev->name); BT_DBG("hdev %s", hdev->name);
list_for_each_entry_safe(c, n, &h->list, list) { /* We should not traverse the list here, because hci_conn_del
c->state = BT_CLOSED; * can remove extra links, which may cause the list traversal
* to hit items that have already been released.
hci_disconn_cfm(c, HCI_ERROR_LOCAL_HOST_TERM); */
while ((conn = list_first_entry_or_null(head,
/* Unlink before deleting otherwise it is possible that struct hci_conn,
* hci_conn_del removes the link which may cause the list to list)) != NULL) {
* contain items already freed. conn->state = BT_CLOSED;
*/ hci_disconn_cfm(conn, HCI_ERROR_LOCAL_HOST_TERM);
hci_conn_unlink(c); hci_conn_del(conn);
hci_conn_del(c);
} }
} }

View file

@ -134,6 +134,29 @@ EXPORT_SYMBOL(page_pool_ethtool_stats_get);
#define recycle_stat_add(pool, __stat, val) #define recycle_stat_add(pool, __stat, val)
#endif #endif
static bool page_pool_producer_lock(struct page_pool *pool)
__acquires(&pool->ring.producer_lock)
{
bool in_softirq = in_softirq();
if (in_softirq)
spin_lock(&pool->ring.producer_lock);
else
spin_lock_bh(&pool->ring.producer_lock);
return in_softirq;
}
static void page_pool_producer_unlock(struct page_pool *pool,
bool in_softirq)
__releases(&pool->ring.producer_lock)
{
if (in_softirq)
spin_unlock(&pool->ring.producer_lock);
else
spin_unlock_bh(&pool->ring.producer_lock);
}
static int page_pool_init(struct page_pool *pool, static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params) const struct page_pool_params *params)
{ {
@ -617,6 +640,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count) int count)
{ {
int i, bulk_len = 0; int i, bulk_len = 0;
bool in_softirq;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]); struct page *page = virt_to_head_page(data[i]);
@ -635,7 +659,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
return; return;
/* Bulk producer into ptr_ring page_pool cache */ /* Bulk producer into ptr_ring page_pool cache */
page_pool_ring_lock(pool); in_softirq = page_pool_producer_lock(pool);
for (i = 0; i < bulk_len; i++) { for (i = 0; i < bulk_len; i++) {
if (__ptr_ring_produce(&pool->ring, data[i])) { if (__ptr_ring_produce(&pool->ring, data[i])) {
/* ring full */ /* ring full */
@ -644,7 +668,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
} }
} }
recycle_stat_add(pool, ring, i); recycle_stat_add(pool, ring, i);
page_pool_ring_unlock(pool); page_pool_producer_unlock(pool, in_softirq);
/* Hopefully all pages was return into ptr_ring */ /* Hopefully all pages was return into ptr_ring */
if (likely(i == bulk_len)) if (likely(i == bulk_len))

View file

@ -5224,8 +5224,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
} else { } else {
skb = skb_clone(orig_skb, GFP_ATOMIC); skb = skb_clone(orig_skb, GFP_ATOMIC);
if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
kfree_skb(skb);
return; return;
}
} }
if (!skb) if (!skb)
return; return;

View file

@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
msg_rx = sk_psock_peek_msg(psock); msg_rx = sk_psock_peek_msg(psock);
} }
out: out:
if (psock->work_state.skb && copied > 0)
schedule_work(&psock->work);
return copied; return copied;
} }
EXPORT_SYMBOL_GPL(sk_msg_recvmsg); EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
@ -624,42 +622,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
static void sk_psock_skb_state(struct sk_psock *psock, static void sk_psock_skb_state(struct sk_psock *psock,
struct sk_psock_work_state *state, struct sk_psock_work_state *state,
struct sk_buff *skb,
int len, int off) int len, int off)
{ {
spin_lock_bh(&psock->ingress_lock); spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
state->skb = skb;
state->len = len; state->len = len;
state->off = off; state->off = off;
} else {
sock_drop(psock->sk, skb);
} }
spin_unlock_bh(&psock->ingress_lock); spin_unlock_bh(&psock->ingress_lock);
} }
static void sk_psock_backlog(struct work_struct *work) static void sk_psock_backlog(struct work_struct *work)
{ {
struct sk_psock *psock = container_of(work, struct sk_psock, work); struct delayed_work *dwork = to_delayed_work(work);
struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state; struct sk_psock_work_state *state = &psock->work_state;
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
u32 len = 0, off = 0;
bool ingress; bool ingress;
u32 len, off;
int ret; int ret;
mutex_lock(&psock->work_mutex); mutex_lock(&psock->work_mutex);
if (unlikely(state->skb)) { if (unlikely(state->len)) {
spin_lock_bh(&psock->ingress_lock);
skb = state->skb;
len = state->len; len = state->len;
off = state->off; off = state->off;
state->skb = NULL;
spin_unlock_bh(&psock->ingress_lock);
} }
if (skb)
goto start;
while ((skb = skb_dequeue(&psock->ingress_skb))) { while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len; len = skb->len;
off = 0; off = 0;
if (skb_bpf_strparser(skb)) { if (skb_bpf_strparser(skb)) {
@ -668,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset; off = stm->offset;
len = stm->full_len; len = stm->full_len;
} }
start:
ingress = skb_bpf_ingress(skb); ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb); skb_bpf_redirect_clear(skb);
do { do {
@ -678,22 +666,28 @@ start:
len, ingress); len, ingress);
if (ret <= 0) { if (ret <= 0) {
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
sk_psock_skb_state(psock, state, skb, sk_psock_skb_state(psock, state, len, off);
len, off);
/* Delay slightly to prioritize any
* other work that might be here.
*/
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_delayed_work(&psock->work, 1);
goto end; goto end;
} }
/* Hard errors break pipe and stop xmit. */ /* Hard errors break pipe and stop xmit. */
sk_psock_report_error(psock, ret ? -ret : EPIPE); sk_psock_report_error(psock, ret ? -ret : EPIPE);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sock_drop(psock->sk, skb);
goto end; goto end;
} }
off += ret; off += ret;
len -= ret; len -= ret;
} while (len); } while (len);
if (!ingress) skb = skb_dequeue(&psock->ingress_skb);
if (!ingress) {
kfree_skb(skb); kfree_skb(skb);
}
} }
end: end:
mutex_unlock(&psock->work_mutex); mutex_unlock(&psock->work_mutex);
@ -734,7 +728,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
INIT_LIST_HEAD(&psock->link); INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock); spin_lock_init(&psock->link_lock);
INIT_WORK(&psock->work, sk_psock_backlog); INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
mutex_init(&psock->work_mutex); mutex_init(&psock->work_mutex);
INIT_LIST_HEAD(&psock->ingress_msg); INIT_LIST_HEAD(&psock->ingress_msg);
spin_lock_init(&psock->ingress_lock); spin_lock_init(&psock->ingress_lock);
@ -786,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
skb_bpf_redirect_clear(skb); skb_bpf_redirect_clear(skb);
sock_drop(psock->sk, skb); sock_drop(psock->sk, skb);
} }
kfree_skb(psock->work_state.skb);
/* We null the skb here to ensure that calls to sk_psock_backlog
* do not pick up the free'd skb.
*/
psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock); __sk_psock_purge_ingress_msg(psock);
} }
@ -809,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock); spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock); sk_psock_cork_free(psock);
__sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock); spin_unlock_bh(&psock->ingress_lock);
} }
@ -823,7 +811,8 @@ static void sk_psock_destroy(struct work_struct *work)
sk_psock_done_strp(psock); sk_psock_done_strp(psock);
cancel_work_sync(&psock->work); cancel_delayed_work_sync(&psock->work);
__sk_psock_zap_ingress(psock);
mutex_destroy(&psock->work_mutex); mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs); psock_progs_drop(&psock->progs);
@ -938,7 +927,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
} }
skb_queue_tail(&psock_other->ingress_skb, skb); skb_queue_tail(&psock_other->ingress_skb, skb);
schedule_work(&psock_other->work); schedule_delayed_work(&psock_other->work, 0);
spin_unlock_bh(&psock_other->ingress_lock); spin_unlock_bh(&psock_other->ingress_lock);
return 0; return 0;
} }
@ -990,10 +979,8 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
err = -EIO; err = -EIO;
sk_other = psock->sk; sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) || if (sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
skb_bpf_redirect_clear(skb);
goto out_free; goto out_free;
}
skb_bpf_set_ingress(skb); skb_bpf_set_ingress(skb);
@ -1018,22 +1005,23 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
spin_lock_bh(&psock->ingress_lock); spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_queue_tail(&psock->ingress_skb, skb); skb_queue_tail(&psock->ingress_skb, skb);
schedule_work(&psock->work); schedule_delayed_work(&psock->work, 0);
err = 0; err = 0;
} }
spin_unlock_bh(&psock->ingress_lock); spin_unlock_bh(&psock->ingress_lock);
if (err < 0) { if (err < 0)
skb_bpf_redirect_clear(skb);
goto out_free; goto out_free;
}
} }
break; break;
case __SK_REDIRECT: case __SK_REDIRECT:
tcp_eat_skb(psock->sk, skb);
err = sk_psock_skb_redirect(psock, skb); err = sk_psock_skb_redirect(psock, skb);
break; break;
case __SK_DROP: case __SK_DROP:
default: default:
out_free: out_free:
skb_bpf_redirect_clear(skb);
tcp_eat_skb(psock->sk, skb);
sock_drop(psock->sk, skb); sock_drop(psock->sk, skb);
} }
@ -1049,7 +1037,7 @@ static void sk_psock_write_space(struct sock *sk)
psock = sk_psock(sk); psock = sk_psock(sk);
if (likely(psock)) { if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
schedule_work(&psock->work); schedule_delayed_work(&psock->work, 0);
write_space = psock->saved_write_space; write_space = psock->saved_write_space;
} }
rcu_read_unlock(); rcu_read_unlock();
@ -1078,8 +1066,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
skb_dst_drop(skb); skb_dst_drop(skb);
skb_bpf_redirect_clear(skb); skb_bpf_redirect_clear(skb);
ret = bpf_prog_run_pin_on_cpu(prog, skb); ret = bpf_prog_run_pin_on_cpu(prog, skb);
if (ret == SK_PASS) skb_bpf_set_strparser(skb);
skb_bpf_set_strparser(skb);
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb)); ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL; skb->sk = NULL;
} }
@ -1183,12 +1170,11 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
int ret = __SK_DROP; int ret = __SK_DROP;
int len = skb->len; int len = skb->len;
skb_get(skb);
rcu_read_lock(); rcu_read_lock();
psock = sk_psock(sk); psock = sk_psock(sk);
if (unlikely(!psock)) { if (unlikely(!psock)) {
len = 0; len = 0;
tcp_eat_skb(sk, skb);
sock_drop(sk, skb); sock_drop(sk, skb);
goto out; goto out;
} }
@ -1212,12 +1198,21 @@ out:
static void sk_psock_verdict_data_ready(struct sock *sk) static void sk_psock_verdict_data_ready(struct sock *sk)
{ {
struct socket *sock = sk->sk_socket; struct socket *sock = sk->sk_socket;
int copied;
trace_sk_data_ready(sk); trace_sk_data_ready(sk);
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb)) if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return; return;
sock->ops->read_skb(sk, sk_psock_verdict_recv); copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
if (copied >= 0) {
struct sk_psock *psock;
rcu_read_lock();
psock = sk_psock(sk);
psock->saved_data_ready(sk);
rcu_read_unlock();
}
} }
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock) void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)

View file

@ -1644,9 +1644,10 @@ void sock_map_close(struct sock *sk, long timeout)
rcu_read_unlock(); rcu_read_unlock();
sk_psock_stop(psock); sk_psock_stop(psock);
release_sock(sk); release_sock(sk);
cancel_work_sync(&psock->work); cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock); sk_psock_put(sk, psock);
} }
/* Make sure we do not recurse. This is a bug. /* Make sure we do not recurse. This is a bug.
* Leak the socket instead of crashing on a stack overflow. * Leak the socket instead of crashing on a stack overflow.
*/ */

View file

@ -102,7 +102,7 @@ struct handshake_req_alloc_test_param handshake_req_alloc_params[] = {
{ {
.desc = "handshake_req_alloc excessive privsize", .desc = "handshake_req_alloc excessive privsize",
.proto = &handshake_req_alloc_proto_6, .proto = &handshake_req_alloc_proto_6,
.gfp = GFP_KERNEL, .gfp = GFP_KERNEL | __GFP_NOWARN,
.expect_success = false, .expect_success = false,
}, },
{ {
@ -209,6 +209,7 @@ static void handshake_req_submit_test4(struct kunit *test)
{ {
struct handshake_req *req, *result; struct handshake_req *req, *result;
struct socket *sock; struct socket *sock;
struct file *filp;
int err; int err;
/* Arrange */ /* Arrange */
@ -218,9 +219,10 @@ static void handshake_req_submit_test4(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1); &sock, 1);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL); filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk); KUNIT_ASSERT_NOT_NULL(test, sock->sk);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL); err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
@ -241,6 +243,7 @@ static void handshake_req_submit_test5(struct kunit *test)
struct handshake_req *req; struct handshake_req *req;
struct handshake_net *hn; struct handshake_net *hn;
struct socket *sock; struct socket *sock;
struct file *filp;
struct net *net; struct net *net;
int saved, err; int saved, err;
@ -251,9 +254,10 @@ static void handshake_req_submit_test5(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1); &sock, 1);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL); filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk); KUNIT_ASSERT_NOT_NULL(test, sock->sk);
sock->file = filp;
net = sock_net(sock->sk); net = sock_net(sock->sk);
hn = handshake_pernet(net); hn = handshake_pernet(net);
@ -276,6 +280,7 @@ static void handshake_req_submit_test6(struct kunit *test)
{ {
struct handshake_req *req1, *req2; struct handshake_req *req1, *req2;
struct socket *sock; struct socket *sock;
struct file *filp;
int err; int err;
/* Arrange */ /* Arrange */
@ -287,9 +292,10 @@ static void handshake_req_submit_test6(struct kunit *test)
err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP, err = __sock_create(&init_net, PF_INET, SOCK_STREAM, IPPROTO_TCP,
&sock, 1); &sock, 1);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL); filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
KUNIT_ASSERT_NOT_NULL(test, sock->sk); KUNIT_ASSERT_NOT_NULL(test, sock->sk);
sock->file = filp;
/* Act */ /* Act */
err = handshake_req_submit(sock, req1, GFP_KERNEL); err = handshake_req_submit(sock, req1, GFP_KERNEL);
@ -307,6 +313,7 @@ static void handshake_req_cancel_test1(struct kunit *test)
{ {
struct handshake_req *req; struct handshake_req *req;
struct socket *sock; struct socket *sock;
struct file *filp;
bool result; bool result;
int err; int err;
@ -318,8 +325,9 @@ static void handshake_req_cancel_test1(struct kunit *test)
&sock, 1); &sock, 1);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL); filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL); err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
@ -340,6 +348,7 @@ static void handshake_req_cancel_test2(struct kunit *test)
struct handshake_req *req, *next; struct handshake_req *req, *next;
struct handshake_net *hn; struct handshake_net *hn;
struct socket *sock; struct socket *sock;
struct file *filp;
struct net *net; struct net *net;
bool result; bool result;
int err; int err;
@ -352,8 +361,9 @@ static void handshake_req_cancel_test2(struct kunit *test)
&sock, 1); &sock, 1);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL); filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL); err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
@ -380,6 +390,7 @@ static void handshake_req_cancel_test3(struct kunit *test)
struct handshake_req *req, *next; struct handshake_req *req, *next;
struct handshake_net *hn; struct handshake_net *hn;
struct socket *sock; struct socket *sock;
struct file *filp;
struct net *net; struct net *net;
bool result; bool result;
int err; int err;
@ -392,8 +403,9 @@ static void handshake_req_cancel_test3(struct kunit *test)
&sock, 1); &sock, 1);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL); filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL); err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
@ -436,6 +448,7 @@ static void handshake_req_destroy_test1(struct kunit *test)
{ {
struct handshake_req *req; struct handshake_req *req;
struct socket *sock; struct socket *sock;
struct file *filp;
int err; int err;
/* Arrange */ /* Arrange */
@ -448,8 +461,9 @@ static void handshake_req_destroy_test1(struct kunit *test)
&sock, 1); &sock, 1);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);
sock->file = sock_alloc_file(sock, O_NONBLOCK, NULL); filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, sock->file); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp);
sock->file = filp;
err = handshake_req_submit(sock, req, GFP_KERNEL); err = handshake_req_submit(sock, req, GFP_KERNEL);
KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, err, 0);

View file

@ -31,6 +31,7 @@ struct handshake_req {
struct list_head hr_list; struct list_head hr_list;
struct rhash_head hr_rhash; struct rhash_head hr_rhash;
unsigned long hr_flags; unsigned long hr_flags;
struct file *hr_file;
const struct handshake_proto *hr_proto; const struct handshake_proto *hr_proto;
struct sock *hr_sk; struct sock *hr_sk;
void (*hr_odestruct)(struct sock *sk); void (*hr_odestruct)(struct sock *sk);

View file

@ -48,7 +48,7 @@ int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
proto->hp_handler_class)) proto->hp_handler_class))
return -ESRCH; return -ESRCH;
msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, flags);
if (!msg) if (!msg)
return -ENOMEM; return -ENOMEM;
@ -99,9 +99,6 @@ static int handshake_dup(struct socket *sock)
struct file *file; struct file *file;
int newfd; int newfd;
if (!sock->file)
return -EBADF;
file = get_file(sock->file); file = get_file(sock->file);
newfd = get_unused_fd_flags(O_CLOEXEC); newfd = get_unused_fd_flags(O_CLOEXEC);
if (newfd < 0) { if (newfd < 0) {
@ -142,15 +139,16 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
goto out_complete; goto out_complete;
} }
err = req->hr_proto->hp_accept(req, info, fd); err = req->hr_proto->hp_accept(req, info, fd);
if (err) if (err) {
fput(sock->file);
goto out_complete; goto out_complete;
}
trace_handshake_cmd_accept(net, req, req->hr_sk, fd); trace_handshake_cmd_accept(net, req, req->hr_sk, fd);
return 0; return 0;
out_complete: out_complete:
handshake_complete(req, -EIO, NULL); handshake_complete(req, -EIO, NULL);
fput(sock->file);
out_status: out_status:
trace_handshake_cmd_accept_err(net, req, NULL, err); trace_handshake_cmd_accept_err(net, req, NULL, err);
return err; return err;
@ -159,8 +157,8 @@ out_status:
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
{ {
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
struct handshake_req *req = NULL;
struct socket *sock = NULL; struct socket *sock = NULL;
struct handshake_req *req;
int fd, status, err; int fd, status, err;
if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD)) if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))

View file

@ -239,6 +239,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req,
} }
req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_odestruct = req->hr_sk->sk_destruct;
req->hr_sk->sk_destruct = handshake_sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct;
req->hr_file = sock->file;
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
net = sock_net(req->hr_sk); net = sock_net(req->hr_sk);
@ -334,6 +335,9 @@ bool handshake_req_cancel(struct sock *sk)
return false; return false;
} }
/* Request accepted and waiting for DONE */
fput(req->hr_file);
out_true: out_true:
trace_handshake_cancel(net, req, sk); trace_handshake_cancel(net, req, sk);

View file

@ -31,6 +31,7 @@ struct tls_handshake_req {
int th_type; int th_type;
unsigned int th_timeout_ms; unsigned int th_timeout_ms;
int th_auth_mode; int th_auth_mode;
const char *th_peername;
key_serial_t th_keyring; key_serial_t th_keyring;
key_serial_t th_certificate; key_serial_t th_certificate;
key_serial_t th_privkey; key_serial_t th_privkey;
@ -48,6 +49,7 @@ tls_handshake_req_init(struct handshake_req *req,
treq->th_timeout_ms = args->ta_timeout_ms; treq->th_timeout_ms = args->ta_timeout_ms;
treq->th_consumer_done = args->ta_done; treq->th_consumer_done = args->ta_done;
treq->th_consumer_data = args->ta_data; treq->th_consumer_data = args->ta_data;
treq->th_peername = args->ta_peername;
treq->th_keyring = args->ta_keyring; treq->th_keyring = args->ta_keyring;
treq->th_num_peerids = 0; treq->th_num_peerids = 0;
treq->th_certificate = TLS_NO_CERT; treq->th_certificate = TLS_NO_CERT;
@ -214,6 +216,12 @@ static int tls_handshake_accept(struct handshake_req *req,
ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type); ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type);
if (ret < 0) if (ret < 0)
goto out_cancel; goto out_cancel;
if (treq->th_peername) {
ret = nla_put_string(msg, HANDSHAKE_A_ACCEPT_PEERNAME,
treq->th_peername);
if (ret < 0)
goto out_cancel;
}
if (treq->th_timeout_ms) { if (treq->th_timeout_ms) {
ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_TIMEOUT, treq->th_timeout_ms); ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_TIMEOUT, treq->th_timeout_ms);
if (ret < 0) if (ret < 0)

View file

@ -317,7 +317,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->tos = val; ipc->tos = val;
ipc->priority = rt_tos2priority(ipc->tos); ipc->priority = rt_tos2priority(ipc->tos);
break; break;
case IP_PROTOCOL:
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
return -EINVAL;
val = *(int *)CMSG_DATA(cmsg);
if (val < 1 || val > 255)
return -EINVAL;
ipc->protocol = val;
break;
default: default:
return -EINVAL; return -EINVAL;
} }
@ -1761,6 +1768,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_LOCAL_PORT_RANGE: case IP_LOCAL_PORT_RANGE:
val = inet->local_port_range.hi << 16 | inet->local_port_range.lo; val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
break; break;
case IP_PROTOCOL:
val = inet_sk(sk)->inet_num;
break;
default: default:
sockopt_release_sock(sk); sockopt_release_sock(sk);
return -ENOPROTOOPT; return -ENOPROTOOPT;

View file

@ -532,6 +532,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
} }
ipcm_init_sk(&ipc, inet); ipcm_init_sk(&ipc, inet);
/* Keep backward compat */
if (hdrincl)
ipc.protocol = IPPROTO_RAW;
if (msg->msg_controllen) { if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false); err = ip_cmsg_send(sk, msg, &ipc, false);
@ -599,7 +602,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
RT_SCOPE_UNIVERSE, RT_SCOPE_UNIVERSE,
hdrincl ? IPPROTO_RAW : sk->sk_protocol, hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) | inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid); daddr, saddr, 0, 0, sk->sk_uid);

View file

@ -1571,7 +1571,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
* calculation of whether or not we must ACK for the sake of * calculation of whether or not we must ACK for the sake of
* a window update. * a window update.
*/ */
static void __tcp_cleanup_rbuf(struct sock *sk, int copied) void __tcp_cleanup_rbuf(struct sock *sk, int copied)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
bool time_to_ack = false; bool time_to_ack = false;
@ -1773,7 +1773,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
tcp_flags = TCP_SKB_CB(skb)->tcp_flags; tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
used = recv_actor(sk, skb); used = recv_actor(sk, skb);
consume_skb(skb);
if (used < 0) { if (used < 0) {
if (!copied) if (!copied)
copied = used; copied = used;
@ -1787,14 +1786,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
break; break;
} }
} }
WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
return copied; return copied;
} }
EXPORT_SYMBOL(tcp_read_skb); EXPORT_SYMBOL(tcp_read_skb);

View file

@ -11,6 +11,24 @@
#include <net/inet_common.h> #include <net/inet_common.h>
#include <net/tls.h> #include <net/tls.h>
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tcp;
int copied;
if (!skb || !skb->len || !sk_is_tcp(sk))
return;
if (skb_bpf_strparser(skb))
return;
tcp = tcp_sk(sk);
copied = tcp->copied_seq + skb->len;
WRITE_ONCE(tcp->copied_seq, copied);
tcp_rcv_space_adjust(sk);
__tcp_cleanup_rbuf(sk, skb->len);
}
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags) struct sk_msg *msg, u32 apply_bytes, int flags)
{ {
@ -174,14 +192,34 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret; return ret;
} }
static bool is_next_msg_fin(struct sk_psock *psock)
{
struct scatterlist *sge;
struct sk_msg *msg_rx;
int i;
msg_rx = sk_psock_peek_msg(psock);
i = msg_rx->sg.start;
sge = sk_msg_elem(msg_rx, i);
if (!sge->length) {
struct sk_buff *skb = msg_rx->skb;
if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
return true;
}
return false;
}
static int tcp_bpf_recvmsg_parser(struct sock *sk, static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg, struct msghdr *msg,
size_t len, size_t len,
int flags, int flags,
int *addr_len) int *addr_len)
{ {
struct tcp_sock *tcp = tcp_sk(sk);
u32 seq = tcp->copied_seq;
struct sk_psock *psock; struct sk_psock *psock;
int copied; int copied = 0;
if (unlikely(flags & MSG_ERRQUEUE)) if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len); return inet_recv_error(sk, msg, len, addr_len);
@ -194,8 +232,43 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return tcp_recvmsg(sk, msg, len, flags, addr_len); return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk); lock_sock(sk);
/* We may have received data on the sk_receive_queue pre-accept and
* then we can not use read_skb in this context because we haven't
* assigned a sk_socket yet so have no link to the ops. The work-around
* is to check the sk_receive_queue and in these cases read skbs off
* queue again. The read_skb hook is not running at this point because
* of lock_sock so we avoid having multiple runners in read_skb.
*/
if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
tcp_data_ready(sk);
/* This handles the ENOMEM errors if we both receive data
* pre accept and are already under memory pressure. At least
* let user know to retry.
*/
if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
copied = -EAGAIN;
goto out;
}
}
msg_bytes_ready: msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags); copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
/* The typical case for EFAULT is the socket was gracefully
* shutdown with a FIN pkt. So check here the other case is
* some error on copy_page_to_iter which would be unexpected.
* On fin return correct return code to zero.
*/
if (copied == -EFAULT) {
bool is_fin = is_next_msg_fin(psock);
if (is_fin) {
copied = 0;
seq++;
goto out;
}
}
seq += copied;
if (!copied) { if (!copied) {
long timeo; long timeo;
int data; int data;
@ -233,6 +306,10 @@ msg_bytes_ready:
copied = -EAGAIN; copied = -EAGAIN;
} }
out: out:
WRITE_ONCE(tcp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
release_sock(sk); release_sock(sk);
sk_psock_put(sk, psock); sk_psock_put(sk, psock);
return copied; return copied;

View file

@ -1818,7 +1818,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{ {
struct sk_buff *skb; struct sk_buff *skb;
int err, copied; int err;
try_again: try_again:
skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
@ -1837,10 +1837,7 @@ try_again:
} }
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
copied = recv_actor(sk, skb); return recv_actor(sk, skb);
kfree_skb(skb);
return copied;
} }
EXPORT_SYMBOL(udp_read_skb); EXPORT_SYMBOL(udp_read_skb);

View file

@ -64,6 +64,8 @@ struct proto udplite_prot = {
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem, .sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp_sock), .obj_size = sizeof(struct udp_sock),
.h.udp_table = &udplite_table, .h.udp_table = &udplite_table,
}; };

View file

@ -143,6 +143,8 @@ int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type)
optlen = 1; optlen = 1;
break; break;
default: default:
if (len < 2)
goto bad;
optlen = nh[offset + 1] + 2; optlen = nh[offset + 1] + 2;
if (optlen > len) if (optlen > len)
goto bad; goto bad;

View file

@ -793,7 +793,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!proto) if (!proto)
proto = inet->inet_num; proto = inet->inet_num;
else if (proto != inet->inet_num) else if (proto != inet->inet_num &&
inet->inet_num != IPPROTO_RAW)
return -EINVAL; return -EINVAL;
if (proto > 255) if (proto > 255)

View file

@ -60,6 +60,8 @@ struct proto udplitev6_prot = {
.per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc,
.sysctl_mem = sysctl_udp_mem, .sysctl_mem = sysctl_udp_mem,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock), .obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table, .h.udp_table = &udplite_table,
}; };

View file

@ -324,9 +324,12 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
t->pl.probe_size += SCTP_PL_BIG_STEP; t->pl.probe_size += SCTP_PL_BIG_STEP;
} else if (t->pl.state == SCTP_PL_SEARCH) { } else if (t->pl.state == SCTP_PL_SEARCH) {
if (!t->pl.probe_high) { if (!t->pl.probe_high) {
t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, if (t->pl.probe_size < SCTP_MAX_PLPMTU) {
SCTP_MAX_PLPMTU); t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
return false; SCTP_MAX_PLPMTU);
return false;
}
t->pl.probe_high = SCTP_MAX_PLPMTU;
} }
t->pl.probe_size += SCTP_PL_MIN_STEP; t->pl.probe_size += SCTP_PL_MIN_STEP;
if (t->pl.probe_size >= t->pl.probe_high) { if (t->pl.probe_size >= t->pl.probe_high) {
@ -341,7 +344,7 @@ bool sctp_transport_pl_recv(struct sctp_transport *t)
} else if (t->pl.state == SCTP_PL_COMPLETE) { } else if (t->pl.state == SCTP_PL_COMPLETE) {
/* Raise probe_size again after 30 * interval in Search Complete */ /* Raise probe_size again after 30 * interval in Search Complete */
t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
t->pl.probe_size += SCTP_PL_MIN_STEP; t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_MIN_STEP, SCTP_MAX_PLPMTU);
} }
return t->pl.state == SCTP_PL_COMPLETE; return t->pl.state == SCTP_PL_COMPLETE;

View file

@ -2000,8 +2000,10 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
return rc; return rc;
/* create send buffer and rmb */ /* create send buffer and rmb */
if (smc_buf_create(new_smc, false)) if (smc_buf_create(new_smc, false)) {
smc_conn_abort(new_smc, ini->first_contact_local);
return SMC_CLC_DECL_MEM; return SMC_CLC_DECL_MEM;
}
return 0; return 0;
} }
@ -2217,8 +2219,11 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
smcr_version = ini->smcr_version; smcr_version = ini->smcr_version;
ini->smcr_version = SMC_V2; ini->smcr_version = SMC_V2;
rc = smc_listen_rdma_init(new_smc, ini); rc = smc_listen_rdma_init(new_smc, ini);
if (!rc) if (!rc) {
rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local); rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
if (rc)
smc_conn_abort(new_smc, ini->first_contact_local);
}
if (!rc) if (!rc)
return; return;
ini->smcr_version = smcr_version; ini->smcr_version = smcr_version;

View file

@ -127,6 +127,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
int i, j; int i, j;
/* do link balancing */ /* do link balancing */
conn->lnk = NULL; /* reset conn->lnk first */
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &conn->lgr->lnk[i]; struct smc_link *lnk = &conn->lgr->lnk[i];

View file

@ -167,6 +167,11 @@ static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
return ctx->strp.msg_ready; return ctx->strp.msg_ready;
} }
static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
{
return ctx->strp.mixed_decrypted;
}
#ifdef CONFIG_TLS_DEVICE #ifdef CONFIG_TLS_DEVICE
int tls_device_init(void); int tls_device_init(void);
void tls_device_cleanup(void); void tls_device_cleanup(void);

View file

@ -1007,20 +1007,14 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx); struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_buff *skb = tls_strp_msg(sw_ctx); struct sk_buff *skb = tls_strp_msg(sw_ctx);
struct strp_msg *rxm = strp_msg(skb); struct strp_msg *rxm = strp_msg(skb);
int is_decrypted = skb->decrypted; int is_decrypted, is_encrypted;
int is_encrypted = !is_decrypted;
struct sk_buff *skb_iter;
int left;
left = rxm->full_len - skb->len; if (!tls_strp_msg_mixed_decrypted(sw_ctx)) {
/* Check if all the data is decrypted already */ is_decrypted = skb->decrypted;
skb_iter = skb_shinfo(skb)->frag_list; is_encrypted = !is_decrypted;
while (skb_iter && left > 0) { } else {
is_decrypted &= skb_iter->decrypted; is_decrypted = 0;
is_encrypted &= !skb_iter->decrypted; is_encrypted = 0;
left -= skb_iter->len;
skb_iter = skb_iter->next;
} }
trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len, trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,

View file

@ -29,34 +29,50 @@ static void tls_strp_anchor_free(struct tls_strparser *strp)
struct skb_shared_info *shinfo = skb_shinfo(strp->anchor); struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1); DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
shinfo->frag_list = NULL; if (!strp->copy_mode)
shinfo->frag_list = NULL;
consume_skb(strp->anchor); consume_skb(strp->anchor);
strp->anchor = NULL; strp->anchor = NULL;
} }
static struct sk_buff *
tls_strp_skb_copy(struct tls_strparser *strp, struct sk_buff *in_skb,
int offset, int len)
{
struct sk_buff *skb;
int i, err;
skb = alloc_skb_with_frags(0, len, TLS_PAGE_ORDER,
&err, strp->sk->sk_allocation);
if (!skb)
return NULL;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
skb_frag_address(frag),
skb_frag_size(frag)));
offset += skb_frag_size(frag);
}
skb->len = len;
skb->data_len = len;
skb_copy_header(skb, in_skb);
return skb;
}
/* Create a new skb with the contents of input copied to its page frags */ /* Create a new skb with the contents of input copied to its page frags */
static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp) static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
{ {
struct strp_msg *rxm; struct strp_msg *rxm;
struct sk_buff *skb; struct sk_buff *skb;
int i, err, offset;
skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER, skb = tls_strp_skb_copy(strp, strp->anchor, strp->stm.offset,
&err, strp->sk->sk_allocation); strp->stm.full_len);
if (!skb) if (!skb)
return NULL; return NULL;
offset = strp->stm.offset;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset,
skb_frag_address(frag),
skb_frag_size(frag)));
offset += skb_frag_size(frag);
}
skb_copy_header(skb, strp->anchor);
rxm = strp_msg(skb); rxm = strp_msg(skb);
rxm->offset = 0; rxm->offset = 0;
return skb; return skb;
@ -180,22 +196,22 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
for (i = 0; i < shinfo->nr_frags; i++) for (i = 0; i < shinfo->nr_frags; i++)
__skb_frag_unref(&shinfo->frags[i], false); __skb_frag_unref(&shinfo->frags[i], false);
shinfo->nr_frags = 0; shinfo->nr_frags = 0;
if (strp->copy_mode) {
kfree_skb_list(shinfo->frag_list);
shinfo->frag_list = NULL;
}
strp->copy_mode = 0; strp->copy_mode = 0;
strp->mixed_decrypted = 0;
} }
static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb, static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb,
unsigned int offset, size_t in_len) struct sk_buff *in_skb, unsigned int offset,
size_t in_len)
{ {
struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
struct sk_buff *skb;
skb_frag_t *frag;
size_t len, chunk; size_t len, chunk;
skb_frag_t *frag;
int sz; int sz;
if (strp->msg_ready)
return 0;
skb = strp->anchor;
frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE]; frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
len = in_len; len = in_len;
@ -208,19 +224,26 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
skb_frag_size(frag), skb_frag_size(frag),
chunk)); chunk));
sz = tls_rx_msg_size(strp, strp->anchor);
if (sz < 0) {
desc->error = sz;
return 0;
}
/* We may have over-read, sz == 0 is guaranteed under-read */
if (sz > 0)
chunk = min_t(size_t, chunk, sz - skb->len);
skb->len += chunk; skb->len += chunk;
skb->data_len += chunk; skb->data_len += chunk;
skb_frag_size_add(frag, chunk); skb_frag_size_add(frag, chunk);
sz = tls_rx_msg_size(strp, skb);
if (sz < 0)
return sz;
/* We may have over-read, sz == 0 is guaranteed under-read */
if (unlikely(sz && sz < skb->len)) {
int over = skb->len - sz;
WARN_ON_ONCE(over > chunk);
skb->len -= over;
skb->data_len -= over;
skb_frag_size_add(frag, -over);
chunk -= over;
}
frag++; frag++;
len -= chunk; len -= chunk;
offset += chunk; offset += chunk;
@ -247,15 +270,99 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
offset += chunk; offset += chunk;
} }
if (strp->stm.full_len == skb->len) { read_done:
return in_len - len;
}
static int tls_strp_copyin_skb(struct tls_strparser *strp, struct sk_buff *skb,
struct sk_buff *in_skb, unsigned int offset,
size_t in_len)
{
struct sk_buff *nskb, *first, *last;
struct skb_shared_info *shinfo;
size_t chunk;
int sz;
if (strp->stm.full_len)
chunk = strp->stm.full_len - skb->len;
else
chunk = TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
chunk = min(chunk, in_len);
nskb = tls_strp_skb_copy(strp, in_skb, offset, chunk);
if (!nskb)
return -ENOMEM;
shinfo = skb_shinfo(skb);
if (!shinfo->frag_list) {
shinfo->frag_list = nskb;
nskb->prev = nskb;
} else {
first = shinfo->frag_list;
last = first->prev;
last->next = nskb;
first->prev = nskb;
}
skb->len += chunk;
skb->data_len += chunk;
if (!strp->stm.full_len) {
sz = tls_rx_msg_size(strp, skb);
if (sz < 0)
return sz;
/* We may have over-read, sz == 0 is guaranteed under-read */
if (unlikely(sz && sz < skb->len)) {
int over = skb->len - sz;
WARN_ON_ONCE(over > chunk);
skb->len -= over;
skb->data_len -= over;
__pskb_trim(nskb, nskb->len - over);
chunk -= over;
}
strp->stm.full_len = sz;
}
return chunk;
}
static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
unsigned int offset, size_t in_len)
{
struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
struct sk_buff *skb;
int ret;
if (strp->msg_ready)
return 0;
skb = strp->anchor;
if (!skb->len)
skb_copy_decrypted(skb, in_skb);
else
strp->mixed_decrypted |= !!skb_cmp_decrypted(skb, in_skb);
if (IS_ENABLED(CONFIG_TLS_DEVICE) && strp->mixed_decrypted)
ret = tls_strp_copyin_skb(strp, skb, in_skb, offset, in_len);
else
ret = tls_strp_copyin_frag(strp, skb, in_skb, offset, in_len);
if (ret < 0) {
desc->error = ret;
ret = 0;
}
if (strp->stm.full_len && strp->stm.full_len == skb->len) {
desc->count = 0; desc->count = 0;
strp->msg_ready = 1; strp->msg_ready = 1;
tls_rx_msg_ready(strp); tls_rx_msg_ready(strp);
} }
read_done: return ret;
return in_len - len;
} }
static int tls_strp_read_copyin(struct tls_strparser *strp) static int tls_strp_read_copyin(struct tls_strparser *strp)
@ -315,15 +422,19 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
return 0; return 0;
} }
static bool tls_strp_check_no_dup(struct tls_strparser *strp) static bool tls_strp_check_queue_ok(struct tls_strparser *strp)
{ {
unsigned int len = strp->stm.offset + strp->stm.full_len; unsigned int len = strp->stm.offset + strp->stm.full_len;
struct sk_buff *skb; struct sk_buff *first, *skb;
u32 seq; u32 seq;
skb = skb_shinfo(strp->anchor)->frag_list; first = skb_shinfo(strp->anchor)->frag_list;
seq = TCP_SKB_CB(skb)->seq; skb = first;
seq = TCP_SKB_CB(first)->seq;
/* Make sure there's no duplicate data in the queue,
* and the decrypted status matches.
*/
while (skb->len < len) { while (skb->len < len) {
seq += skb->len; seq += skb->len;
len -= skb->len; len -= skb->len;
@ -331,6 +442,8 @@ static bool tls_strp_check_no_dup(struct tls_strparser *strp)
if (TCP_SKB_CB(skb)->seq != seq) if (TCP_SKB_CB(skb)->seq != seq)
return false; return false;
if (skb_cmp_decrypted(first, skb))
return false;
} }
return true; return true;
@ -411,7 +524,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
return tls_strp_read_copy(strp, true); return tls_strp_read_copy(strp, true);
} }
if (!tls_strp_check_no_dup(strp)) if (!tls_strp_check_queue_ok(strp))
return tls_strp_read_copy(strp, false); return tls_strp_read_copy(strp, false);
strp->msg_ready = 1; strp->msg_ready = 1;

View file

@ -2304,10 +2304,14 @@ static void tls_data_ready(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock; struct sk_psock *psock;
gfp_t alloc_save;
trace_sk_data_ready(sk); trace_sk_data_ready(sk);
alloc_save = sk->sk_allocation;
sk->sk_allocation = GFP_ATOMIC;
tls_strp_data_ready(&ctx->strp); tls_strp_data_ready(&ctx->strp);
sk->sk_allocation = alloc_save;
psock = sk_psock_get(sk); psock = sk_psock_get(sk);
if (psock) { if (psock) {

View file

@ -2553,7 +2553,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{ {
struct unix_sock *u = unix_sk(sk); struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb; struct sk_buff *skb;
int err, copied; int err;
mutex_lock(&u->iolock); mutex_lock(&u->iolock);
skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
@ -2561,10 +2561,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
if (!skb) if (!skb)
return err; return err;
copied = recv_actor(sk, skb); return recv_actor(sk, skb);
kfree_skb(skb);
return copied;
} }
/* /*

View file

@ -1441,7 +1441,6 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
struct sock *sk = sk_vsock(vsk); struct sock *sk = sk_vsock(vsk);
struct sk_buff *skb; struct sk_buff *skb;
int off = 0; int off = 0;
int copied;
int err; int err;
spin_lock_bh(&vvs->rx_lock); spin_lock_bh(&vvs->rx_lock);
@ -1454,9 +1453,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
if (!skb) if (!skb)
return err; return err;
copied = recv_actor(sk, skb); return recv_actor(sk, skb);
kfree_skb(skb);
return copied;
} }
EXPORT_SYMBOL_GPL(virtio_transport_read_skb); EXPORT_SYMBOL_GPL(virtio_transport_read_skb);

View file

@ -498,7 +498,6 @@ int main(int argc, char **argv)
"Option -%c requires an argument.\n\n", "Option -%c requires an argument.\n\n",
optopt); optopt);
case 'h': case 'h':
__fallthrough;
default: default:
Usage(); Usage();
return 0; return 0;

View file

@ -197,7 +197,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
$(call msg,SIGN-FILE,,$@) $(call msg,SIGN-FILE,,$@)
$(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \ $(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \
$< -o $@ \ $< -o $@ \
$(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)

View file

@ -2,6 +2,7 @@
// Copyright (c) 2020 Cloudflare // Copyright (c) 2020 Cloudflare
#include <error.h> #include <error.h>
#include <netinet/tcp.h> #include <netinet/tcp.h>
#include <sys/epoll.h>
#include "test_progs.h" #include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h" #include "test_skmsg_load_helpers.skel.h"
@ -9,8 +10,12 @@
#include "test_sockmap_invalid_update.skel.h" #include "test_sockmap_invalid_update.skel.h"
#include "test_sockmap_skb_verdict_attach.skel.h" #include "test_sockmap_skb_verdict_attach.skel.h"
#include "test_sockmap_progs_query.skel.h" #include "test_sockmap_progs_query.skel.h"
#include "test_sockmap_pass_prog.skel.h"
#include "test_sockmap_drop_prog.skel.h"
#include "bpf_iter_sockmap.skel.h" #include "bpf_iter_sockmap.skel.h"
#include "sockmap_helpers.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */ #define TCP_REPAIR 19 /* TCP sock is under repair right now */
#define TCP_REPAIR_ON 1 #define TCP_REPAIR_ON 1
@ -350,6 +355,126 @@ out:
test_sockmap_progs_query__destroy(skel); test_sockmap_progs_query__destroy(skel);
} }
#define MAX_EVENTS 10
static void test_sockmap_skb_verdict_shutdown(void)
{
struct epoll_event ev, events[MAX_EVENTS];
int n, err, map, verdict, s, c1, p1;
struct test_sockmap_pass_prog *skel;
int epollfd;
int zero = 0;
char b;
skel = test_sockmap_pass_prog__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_and_load"))
return;
verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
map = bpf_map__fd(skel->maps.sock_map_rx);
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
s = socket_loopback(AF_INET, SOCK_STREAM);
if (s < 0)
goto out;
err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
if (err < 0)
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (err < 0)
goto out_close;
shutdown(p1, SHUT_WR);
ev.events = EPOLLIN;
ev.data.fd = c1;
epollfd = epoll_create1(0);
if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
goto out_close;
err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
goto out_close;
err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
goto out_close;
n = recv(c1, &b, 1, SOCK_NONBLOCK);
ASSERT_EQ(n, 0, "recv_timeout(fin)");
out_close:
close(c1);
close(p1);
out:
test_sockmap_pass_prog__destroy(skel);
}
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
int expected, zero = 0, sent, recvd, avail;
int err, map, verdict, s, c0, c1, p0, p1;
struct test_sockmap_pass_prog *pass;
struct test_sockmap_drop_prog *drop;
char buf[256] = "0123456789";
if (pass_prog) {
pass = test_sockmap_pass_prog__open_and_load();
if (!ASSERT_OK_PTR(pass, "open_and_load"))
return;
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
map = bpf_map__fd(pass->maps.sock_map_rx);
expected = sizeof(buf);
} else {
drop = test_sockmap_drop_prog__open_and_load();
if (!ASSERT_OK_PTR(drop, "open_and_load"))
return;
verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
map = bpf_map__fd(drop->maps.sock_map_rx);
/* On drop data is consumed immediately and copied_seq inc'd */
expected = 0;
}
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
s = socket_loopback(AF_INET, SOCK_STREAM);
if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
goto out;
err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
if (!ASSERT_OK(err, "create_socket_pairs(s)"))
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
goto out_close;
sent = xsend(p1, &buf, sizeof(buf), 0);
ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
err = ioctl(c1, FIONREAD, &avail);
ASSERT_OK(err, "ioctl(FIONREAD) error");
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
/* On DROP test there will be no data to read */
if (pass_prog) {
recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
}
out_close:
close(c0);
close(p0);
close(c1);
close(p1);
out:
if (pass_prog)
test_sockmap_pass_prog__destroy(pass);
else
test_sockmap_drop_prog__destroy(drop);
}
void test_sockmap_basic(void) void test_sockmap_basic(void)
{ {
if (test__start_subtest("sockmap create_update_free")) if (test__start_subtest("sockmap create_update_free"))
@ -384,4 +509,10 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT); test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
if (test__start_subtest("sockmap skb_verdict progs query")) if (test__start_subtest("sockmap skb_verdict progs query"))
test_sockmap_progs_query(BPF_SK_SKB_VERDICT); test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
if (test__start_subtest("sockmap skb_verdict shutdown"))
test_sockmap_skb_verdict_shutdown();
if (test__start_subtest("sockmap skb_verdict fionread"))
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
test_sockmap_skb_verdict_fionread(false);
} }

View file

@ -0,0 +1,390 @@
#ifndef __SOCKMAP_HELPERS__
#define __SOCKMAP_HELPERS__
#include <linux/vm_sockets.h>
#define IO_TIMEOUT_SEC 30
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
/* workaround for older vm_sockets.h */
#ifndef VMADDR_CID_LOCAL
#define VMADDR_CID_LOCAL 1
#endif
#define __always_unused __attribute__((__unused__))
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
CHECK_FAIL(true); \
})
#define FAIL(fmt...) _FAIL(0, fmt)
#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
#define FAIL_LIBBPF(err, msg) \
({ \
char __buf[MAX_STRERR_LEN]; \
libbpf_strerror((err), __buf, sizeof(__buf)); \
FAIL("%s: %s", (msg), __buf); \
})
/* Wrappers that fail the test on error and report it. */
#define xaccept_nonblock(fd, addr, len) \
({ \
int __ret = \
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("accept"); \
__ret; \
})
#define xbind(fd, addr, len) \
({ \
int __ret = bind((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("bind"); \
__ret; \
})
#define xclose(fd) \
({ \
int __ret = close((fd)); \
if (__ret == -1) \
FAIL_ERRNO("close"); \
__ret; \
})
#define xconnect(fd, addr, len) \
({ \
int __ret = connect((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("connect"); \
__ret; \
})
#define xgetsockname(fd, addr, len) \
({ \
int __ret = getsockname((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockname"); \
__ret; \
})
#define xgetsockopt(fd, level, name, val, len) \
({ \
int __ret = getsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockopt(" #name ")"); \
__ret; \
})
#define xlisten(fd, backlog) \
({ \
int __ret = listen((fd), (backlog)); \
if (__ret == -1) \
FAIL_ERRNO("listen"); \
__ret; \
})
#define xsetsockopt(fd, level, name, val, len) \
({ \
int __ret = setsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("setsockopt(" #name ")"); \
__ret; \
})
#define xsend(fd, buf, len, flags) \
({ \
ssize_t __ret = send((fd), (buf), (len), (flags)); \
if (__ret == -1) \
FAIL_ERRNO("send"); \
__ret; \
})
#define xrecv_nonblock(fd, buf, len, flags) \
({ \
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("recv"); \
__ret; \
})
#define xsocket(family, sotype, flags) \
({ \
int __ret = socket(family, sotype, flags); \
if (__ret == -1) \
FAIL_ERRNO("socket"); \
__ret; \
})
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
#define xbpf_prog_attach(prog, target, type, flags) \
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
#define xpthread_create(thread, attr, func, arg) \
({ \
int __ret = pthread_create((thread), (attr), (func), (arg)); \
errno = __ret; \
if (__ret) \
FAIL_ERRNO("pthread_create"); \
__ret; \
})
#define xpthread_join(thread, retval) \
({ \
int __ret = pthread_join((thread), (retval)); \
errno = __ret; \
if (__ret) \
FAIL_ERRNO("pthread_join"); \
__ret; \
})
static inline int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set rfds;
int r;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
if (r == 0)
errno = ETIME;
return r == 1 ? 0 : -1;
}
static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return accept(fd, addr, len);
}
static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return recv(fd, buf, len, flags);
}
static inline void init_addr_loopback4(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
addr4->sin_family = AF_INET;
addr4->sin_port = 0;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
*len = sizeof(*addr4);
}
static inline void init_addr_loopback6(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
addr6->sin6_family = AF_INET6;
addr6->sin6_port = 0;
addr6->sin6_addr = in6addr_loopback;
*len = sizeof(*addr6);
}
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
addr->svm_family = AF_VSOCK;
addr->svm_port = VMADDR_PORT_ANY;
addr->svm_cid = VMADDR_CID_LOCAL;
*len = sizeof(*addr);
}
static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
socklen_t *len)
{
switch (family) {
case AF_INET:
init_addr_loopback4(ss, len);
return;
case AF_INET6:
init_addr_loopback6(ss, len);
return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
default:
FAIL("unsupported address family %d", family);
}
}
static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
{
return (struct sockaddr *)ss;
}
static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
{
u64 value;
u32 key;
int err;
key = 0;
value = fd1;
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
if (err)
return err;
key = 1;
value = fd2;
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
}
static inline int create_pair(int s, int family, int sotype, int *c, int *p)
{
struct sockaddr_storage addr;
socklen_t len;
int err = 0;
len = sizeof(addr);
err = xgetsockname(s, sockaddr(&addr), &len);
if (err)
return err;
*c = xsocket(family, sotype, 0);
if (*c < 0)
return errno;
err = xconnect(*c, sockaddr(&addr), len);
if (err) {
err = errno;
goto close_cli0;
}
*p = xaccept_nonblock(s, NULL, NULL);
if (*p < 0) {
err = errno;
goto close_cli0;
}
return err;
close_cli0:
close(*c);
return err;
}
static inline int create_socket_pairs(int s, int family, int sotype,
int *c0, int *c1, int *p0, int *p1)
{
int err;
err = create_pair(s, family, sotype, c0, p0);
if (err)
return err;
err = create_pair(s, family, sotype, c1, p1);
if (err) {
close(*c0);
close(*p0);
}
return err;
}
static inline int enable_reuseport(int s, int progfd)
{
int err, one = 1;
err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
if (err)
return -1;
err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
sizeof(progfd));
if (err)
return -1;
return 0;
}
static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
{
struct sockaddr_storage addr;
socklen_t len;
int err, s;
init_addr_loopback(family, &addr, &len);
s = xsocket(family, sotype, 0);
if (s == -1)
return -1;
if (progfd >= 0)
enable_reuseport(s, progfd);
err = xbind(s, sockaddr(&addr), len);
if (err)
goto close;
if (sotype & SOCK_DGRAM)
return s;
err = xlisten(s, SOMAXCONN);
if (err)
goto close;
return s;
close:
xclose(s);
return -1;
}
static inline int socket_loopback(int family, int sotype)
{
return socket_loopback_reuseport(family, sotype, -1);
}
#endif // __SOCKMAP_HELPERS__

View file

@ -20,11 +20,6 @@
#include <unistd.h> #include <unistd.h>
#include <linux/vm_sockets.h> #include <linux/vm_sockets.h>
/* workaround for older vm_sockets.h */
#ifndef VMADDR_CID_LOCAL
#define VMADDR_CID_LOCAL 1
#endif
#include <bpf/bpf.h> #include <bpf/bpf.h>
#include <bpf/libbpf.h> #include <bpf/libbpf.h>
@ -32,315 +27,7 @@
#include "test_progs.h" #include "test_progs.h"
#include "test_sockmap_listen.skel.h" #include "test_sockmap_listen.skel.h"
#define IO_TIMEOUT_SEC 30 #include "sockmap_helpers.h"
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
#define __always_unused __attribute__((__unused__))
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
CHECK_FAIL(true); \
})
#define FAIL(fmt...) _FAIL(0, fmt)
#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
#define FAIL_LIBBPF(err, msg) \
({ \
char __buf[MAX_STRERR_LEN]; \
libbpf_strerror((err), __buf, sizeof(__buf)); \
FAIL("%s: %s", (msg), __buf); \
})
/* Wrappers that fail the test on error and report it. */
#define xaccept_nonblock(fd, addr, len) \
({ \
int __ret = \
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("accept"); \
__ret; \
})
#define xbind(fd, addr, len) \
({ \
int __ret = bind((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("bind"); \
__ret; \
})
#define xclose(fd) \
({ \
int __ret = close((fd)); \
if (__ret == -1) \
FAIL_ERRNO("close"); \
__ret; \
})
#define xconnect(fd, addr, len) \
({ \
int __ret = connect((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("connect"); \
__ret; \
})
#define xgetsockname(fd, addr, len) \
({ \
int __ret = getsockname((fd), (addr), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockname"); \
__ret; \
})
#define xgetsockopt(fd, level, name, val, len) \
({ \
int __ret = getsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("getsockopt(" #name ")"); \
__ret; \
})
#define xlisten(fd, backlog) \
({ \
int __ret = listen((fd), (backlog)); \
if (__ret == -1) \
FAIL_ERRNO("listen"); \
__ret; \
})
#define xsetsockopt(fd, level, name, val, len) \
({ \
int __ret = setsockopt((fd), (level), (name), (val), (len)); \
if (__ret == -1) \
FAIL_ERRNO("setsockopt(" #name ")"); \
__ret; \
})
#define xsend(fd, buf, len, flags) \
({ \
ssize_t __ret = send((fd), (buf), (len), (flags)); \
if (__ret == -1) \
FAIL_ERRNO("send"); \
__ret; \
})
#define xrecv_nonblock(fd, buf, len, flags) \
({ \
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
IO_TIMEOUT_SEC); \
if (__ret == -1) \
FAIL_ERRNO("recv"); \
__ret; \
})
#define xsocket(family, sotype, flags) \
({ \
int __ret = socket(family, sotype, flags); \
if (__ret == -1) \
FAIL_ERRNO("socket"); \
__ret; \
})
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
#define xbpf_prog_attach(prog, target, type, flags) \
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
#define xpthread_create(thread, attr, func, arg) \
({ \
int __ret = pthread_create((thread), (attr), (func), (arg)); \
errno = __ret; \
if (__ret) \
FAIL_ERRNO("pthread_create"); \
__ret; \
})
#define xpthread_join(thread, retval) \
({ \
int __ret = pthread_join((thread), (retval)); \
errno = __ret; \
if (__ret) \
FAIL_ERRNO("pthread_join"); \
__ret; \
})
static int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set rfds;
int r;
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
if (r == 0)
errno = ETIME;
return r == 1 ? 0 : -1;
}
static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return accept(fd, addr, len);
}
static int recv_timeout(int fd, void *buf, size_t len, int flags,
unsigned int timeout_sec)
{
if (poll_read(fd, timeout_sec))
return -1;
return recv(fd, buf, len, flags);
}
static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
{
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
addr4->sin_family = AF_INET;
addr4->sin_port = 0;
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
*len = sizeof(*addr4);
}
static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
{
struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
addr6->sin6_family = AF_INET6;
addr6->sin6_port = 0;
addr6->sin6_addr = in6addr_loopback;
*len = sizeof(*addr6);
}
static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len)
{
struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
addr->svm_family = AF_VSOCK;
addr->svm_port = VMADDR_PORT_ANY;
addr->svm_cid = VMADDR_CID_LOCAL;
*len = sizeof(*addr);
}
static void init_addr_loopback(int family, struct sockaddr_storage *ss,
socklen_t *len)
{
switch (family) {
case AF_INET:
init_addr_loopback4(ss, len);
return;
case AF_INET6:
init_addr_loopback6(ss, len);
return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
default:
FAIL("unsupported address family %d", family);
}
}
static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
{
return (struct sockaddr *)ss;
}
static int enable_reuseport(int s, int progfd)
{
int err, one = 1;
err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
if (err)
return -1;
err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
sizeof(progfd));
if (err)
return -1;
return 0;
}
static int socket_loopback_reuseport(int family, int sotype, int progfd)
{
struct sockaddr_storage addr;
socklen_t len;
int err, s;
init_addr_loopback(family, &addr, &len);
s = xsocket(family, sotype, 0);
if (s == -1)
return -1;
if (progfd >= 0)
enable_reuseport(s, progfd);
err = xbind(s, sockaddr(&addr), len);
if (err)
goto close;
if (sotype & SOCK_DGRAM)
return s;
err = xlisten(s, SOMAXCONN);
if (err)
goto close;
return s;
close:
xclose(s);
return -1;
}
static int socket_loopback(int family, int sotype)
{
return socket_loopback_reuseport(family, sotype, -1);
}
static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused, static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
int family, int sotype, int mapfd) int family, int sotype, int mapfd)
@ -984,31 +671,12 @@ static const char *redir_mode_str(enum redir_mode mode)
} }
} }
static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
{
u64 value;
u32 key;
int err;
key = 0;
value = fd1;
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
if (err)
return err;
key = 1;
value = fd2;
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
}
static void redir_to_connected(int family, int sotype, int sock_mapfd, static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode) int verd_mapfd, enum redir_mode mode)
{ {
const char *log_prefix = redir_mode_str(mode); const char *log_prefix = redir_mode_str(mode);
struct sockaddr_storage addr;
int s, c0, c1, p0, p1; int s, c0, c1, p0, p1;
unsigned int pass; unsigned int pass;
socklen_t len;
int err, n; int err, n;
u32 key; u32 key;
char b; char b;
@ -1019,36 +687,13 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (s < 0) if (s < 0)
return; return;
len = sizeof(addr); err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
err = xgetsockname(s, sockaddr(&addr), &len);
if (err) if (err)
goto close_srv; goto close_srv;
c0 = xsocket(family, sotype, 0);
if (c0 < 0)
goto close_srv;
err = xconnect(c0, sockaddr(&addr), len);
if (err)
goto close_cli0;
p0 = xaccept_nonblock(s, NULL, NULL);
if (p0 < 0)
goto close_cli0;
c1 = xsocket(family, sotype, 0);
if (c1 < 0)
goto close_peer0;
err = xconnect(c1, sockaddr(&addr), len);
if (err)
goto close_cli1;
p1 = xaccept_nonblock(s, NULL, NULL);
if (p1 < 0)
goto close_cli1;
err = add_to_sockmap(sock_mapfd, p0, p1); err = add_to_sockmap(sock_mapfd, p0, p1);
if (err) if (err)
goto close_peer1; goto close;
n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1); n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
if (n < 0) if (n < 0)
@ -1056,12 +701,12 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0) if (n == 0)
FAIL("%s: incomplete write", log_prefix); FAIL("%s: incomplete write", log_prefix);
if (n < 1) if (n < 1)
goto close_peer1; goto close;
key = SK_PASS; key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err) if (err)
goto close_peer1; goto close;
if (pass != 1) if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass); FAIL("%s: want pass count 1, have %d", log_prefix, pass);
n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
@ -1070,13 +715,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0) if (n == 0)
FAIL("%s: incomplete recv", log_prefix); FAIL("%s: incomplete recv", log_prefix);
close_peer1: close:
xclose(p1); xclose(p1);
close_cli1:
xclose(c1); xclose(c1);
close_peer0:
xclose(p0); xclose(p0);
close_cli0:
xclose(c0); xclose(c0);
close_srv: close_srv:
xclose(s); xclose(s);

View file

@ -0,0 +1,32 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_rx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_tx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_msg SEC(".maps");
SEC("sk_skb")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_DROP;
}
char _license[] SEC("license") = "GPL";

View file

@ -191,7 +191,7 @@ SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops) int bpf_sockmap(struct bpf_sock_ops *skops)
{ {
__u32 lport, rport; __u32 lport, rport;
int op, err, ret; int op, ret;
op = (int) skops->op; op = (int) skops->op;
@ -203,10 +203,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (lport == 10000) { if (lport == 10000) {
ret = 1; ret = 1;
#ifdef SOCKMAP #ifdef SOCKMAP
err = bpf_sock_map_update(skops, &sock_map, &ret, bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST); BPF_NOEXIST);
#else #else
err = bpf_sock_hash_update(skops, &sock_map, &ret, bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST); BPF_NOEXIST);
#endif #endif
} }
@ -218,10 +218,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (bpf_ntohl(rport) == 10001) { if (bpf_ntohl(rport) == 10001) {
ret = 10; ret = 10;
#ifdef SOCKMAP #ifdef SOCKMAP
err = bpf_sock_map_update(skops, &sock_map, &ret, bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST); BPF_NOEXIST);
#else #else
err = bpf_sock_hash_update(skops, &sock_map, &ret, bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST); BPF_NOEXIST);
#endif #endif
} }
@ -230,8 +230,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
break; break;
} }
__sink(err);
return 0; return 0;
} }

View file

@ -0,0 +1,32 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_rx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_tx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
__type(key, int);
__type(value, int);
} sock_map_msg SEC(".maps");
SEC("sk_skb")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_PASS;
}
char _license[] SEC("license") = "GPL";

View file

@ -68,7 +68,7 @@ setup()
cleanup() cleanup()
{ {
$IP link del dev dummy0 &> /dev/null $IP link del dev dummy0 &> /dev/null
ip netns del ns1 ip netns del ns1 &> /dev/null
ip netns del ns2 &> /dev/null ip netns del ns2 &> /dev/null
} }