Merge branch 'eth-fbnic-report-software-queue-stats'

Jakub Kicinski says:

====================
eth: fbnic: report software queue stats

Fill in typical software queue stats.

  # ./pyynl/cli.py --spec netlink/specs/netdev.yaml --dump qstats-get
  [{'ifindex': 2,
    'rx-alloc-fail': 0,
    'rx-bytes': 398064076,
    'rx-csum-complete': 271,
    'rx-csum-none': 0,
    'rx-packets': 276044,
    'tx-bytes': 7223770,
    'tx-needs-csum': 28148,
    'tx-packets': 28449,
    'tx-stop': 0,
    'tx-wake': 0}]

Note that we don't collect csum-unnecessary, just the uncommon
cases (and unnecessary is all the rest of the packets). There
is no programatic use for these stats AFAIK, just manual debug.
====================

Link: https://patch.msgid.link/20250211181356.580800-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-02-12 16:38:03 -08:00
7 changed files with 102 additions and 23 deletions

View File

@@ -20,6 +20,7 @@ fbnic-y := fbnic_csr.o \
fbnic_pci.o \
fbnic_phylink.o \
fbnic_rpc.o \
fbnic_time.o \
fbnic_tlv.o \
fbnic_txrx.o \
fbnic_time.o
# End of objects

View File

@@ -1224,14 +1224,14 @@ static void fbnic_get_ts_stats(struct net_device *netdev,
unsigned int start;
int i;
ts_stats->pkts = fbn->tx_stats.ts_packets;
ts_stats->lost = fbn->tx_stats.ts_lost;
ts_stats->pkts = fbn->tx_stats.twq.ts_packets;
ts_stats->lost = fbn->tx_stats.twq.ts_lost;
for (i = 0; i < fbn->num_tx_queues; i++) {
ring = fbn->tx[i];
do {
start = u64_stats_fetch_begin(&ring->stats.syncp);
ts_packets = ring->stats.ts_packets;
ts_lost = ring->stats.ts_lost;
ts_packets = ring->stats.twq.ts_packets;
ts_lost = ring->stats.twq.ts_lost;
} while (u64_stats_fetch_retry(&ring->stats.syncp, start));
ts_stats->pkts += ts_packets;
ts_stats->lost += ts_lost;

View File

@@ -487,8 +487,9 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
struct fbnic_net *fbn = netdev_priv(dev);
struct fbnic_ring *rxr = fbn->rx[idx];
struct fbnic_queue_stats *stats;
u64 bytes, packets, alloc_fail;
u64 csum_complete, csum_none;
unsigned int start;
u64 bytes, packets;
if (!rxr)
return;
@@ -498,10 +499,16 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
start = u64_stats_fetch_begin(&stats->syncp);
bytes = stats->bytes;
packets = stats->packets;
alloc_fail = stats->rx.alloc_failed;
csum_complete = stats->rx.csum_complete;
csum_none = stats->rx.csum_none;
} while (u64_stats_fetch_retry(&stats->syncp, start));
rx->bytes = bytes;
rx->packets = packets;
rx->alloc_fail = alloc_fail;
rx->csum_complete = csum_complete;
rx->csum_none = csum_none;
}
static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
@@ -510,6 +517,7 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
struct fbnic_net *fbn = netdev_priv(dev);
struct fbnic_ring *txr = fbn->tx[idx];
struct fbnic_queue_stats *stats;
u64 stop, wake, csum;
unsigned int start;
u64 bytes, packets;
@@ -521,10 +529,16 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
start = u64_stats_fetch_begin(&stats->syncp);
bytes = stats->bytes;
packets = stats->packets;
csum = stats->twq.csum_partial;
stop = stats->twq.stop;
wake = stats->twq.wake;
} while (u64_stats_fetch_retry(&stats->syncp, start));
tx->bytes = bytes;
tx->packets = packets;
tx->needs_csum = csum;
tx->stop = stop;
tx->wake = wake;
}
static void fbnic_get_base_stats(struct net_device *dev,
@@ -535,9 +549,15 @@ static void fbnic_get_base_stats(struct net_device *dev,
tx->bytes = fbn->tx_stats.bytes;
tx->packets = fbn->tx_stats.packets;
tx->needs_csum = fbn->tx_stats.twq.csum_partial;
tx->stop = fbn->tx_stats.twq.stop;
tx->wake = fbn->tx_stats.twq.wake;
rx->bytes = fbn->rx_stats.bytes;
rx->packets = fbn->rx_stats.packets;
rx->alloc_fail = fbn->rx_stats.rx.alloc_failed;
rx->csum_complete = fbn->rx_stats.rx.csum_complete;
rx->csum_none = fbn->rx_stats.rx.csum_none;
}
static const struct netdev_stat_ops fbnic_stat_ops = {

View File

@@ -113,6 +113,11 @@ static int fbnic_maybe_stop_tx(const struct net_device *dev,
res = netif_txq_maybe_stop(txq, fbnic_desc_unused(ring), size,
FBNIC_TX_DESC_WAKEUP);
if (!res) {
u64_stats_update_begin(&ring->stats.syncp);
ring->stats.twq.stop++;
u64_stats_update_end(&ring->stats.syncp);
}
return !res;
}
@@ -191,6 +196,9 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
skb->csum_offset / 2));
*meta |= cpu_to_le64(FBNIC_TWD_FLAG_REQ_CSO);
u64_stats_update_begin(&ring->stats.syncp);
ring->stats.twq.csum_partial++;
u64_stats_update_end(&ring->stats.syncp);
*meta |= cpu_to_le64(FIELD_PREP(FBNIC_TWD_L2_HLEN_MASK, l2len / 2) |
FIELD_PREP(FBNIC_TWD_L3_IHLEN_MASK, i3len / 2));
@@ -198,12 +206,15 @@ fbnic_tx_offloads(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta)
}
static void
fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq)
fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq,
u64 *csum_cmpl, u64 *csum_none)
{
skb_checksum_none_assert(skb);
if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM)))
if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM))) {
(*csum_none)++;
return;
}
if (FIELD_GET(FBNIC_RCD_META_L4_CSUM_UNNECESSARY, rcd)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -212,6 +223,7 @@ fbnic_rx_csum(u64 rcd, struct sk_buff *skb, struct fbnic_ring *rcq)
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = (__force __wsum)csum;
(*csum_cmpl)++;
}
}
@@ -444,7 +456,7 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
if (unlikely(discard)) {
u64_stats_update_begin(&ring->stats.syncp);
ring->stats.dropped += total_packets;
ring->stats.ts_lost += ts_lost;
ring->stats.twq.ts_lost += ts_lost;
u64_stats_update_end(&ring->stats.syncp);
netdev_tx_completed_queue(txq, total_packets, total_bytes);
@@ -456,9 +468,13 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget,
ring->stats.packets += total_packets;
u64_stats_update_end(&ring->stats.syncp);
netif_txq_completed_wake(txq, total_packets, total_bytes,
fbnic_desc_unused(ring),
FBNIC_TX_DESC_WAKEUP);
if (!netif_txq_completed_wake(txq, total_packets, total_bytes,
fbnic_desc_unused(ring),
FBNIC_TX_DESC_WAKEUP)) {
u64_stats_update_begin(&ring->stats.syncp);
ring->stats.twq.wake++;
u64_stats_update_end(&ring->stats.syncp);
}
}
static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
@@ -507,7 +523,7 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv,
skb_tstamp_tx(skb, &hwtstamp);
u64_stats_update_begin(&ring->stats.syncp);
ring->stats.ts_packets++;
ring->stats.twq.ts_packets++;
u64_stats_update_end(&ring->stats.syncp);
}
@@ -661,8 +677,13 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq)
struct page *page;
page = page_pool_dev_alloc_pages(nv->page_pool);
if (!page)
if (!page) {
u64_stats_update_begin(&bdq->stats.syncp);
bdq->stats.rx.alloc_failed++;
u64_stats_update_end(&bdq->stats.syncp);
break;
}
fbnic_page_pool_init(bdq, i, page);
fbnic_bd_prep(bdq, i, page);
@@ -875,12 +896,13 @@ static void fbnic_rx_tstamp(struct fbnic_napi_vector *nv, u64 rcd,
static void fbnic_populate_skb_fields(struct fbnic_napi_vector *nv,
u64 rcd, struct sk_buff *skb,
struct fbnic_q_triad *qt)
struct fbnic_q_triad *qt,
u64 *csum_cmpl, u64 *csum_none)
{
struct net_device *netdev = nv->napi.dev;
struct fbnic_ring *rcq = &qt->cmpl;
fbnic_rx_csum(rcd, skb, rcq);
fbnic_rx_csum(rcd, skb, rcq, csum_cmpl, csum_none);
if (netdev->features & NETIF_F_RXHASH)
skb_set_hash(skb,
@@ -898,7 +920,8 @@ static bool fbnic_rcd_metadata_err(u64 rcd)
static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
struct fbnic_q_triad *qt, int budget)
{
unsigned int packets = 0, bytes = 0, dropped = 0;
unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0;
u64 csum_complete = 0, csum_none = 0;
struct fbnic_ring *rcq = &qt->cmpl;
struct fbnic_pkt_buff *pkt;
s32 head0 = -1, head1 = -1;
@@ -947,14 +970,22 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
/* Populate skb and invalidate XDP */
if (!IS_ERR_OR_NULL(skb)) {
fbnic_populate_skb_fields(nv, rcd, skb, qt);
fbnic_populate_skb_fields(nv, rcd, skb, qt,
&csum_complete,
&csum_none);
packets++;
bytes += skb->len;
napi_gro_receive(&nv->napi, skb);
} else {
dropped++;
if (!skb) {
alloc_failed++;
dropped++;
} else {
dropped++;
}
fbnic_put_pkt_buff(nv, pkt, 1);
}
@@ -977,6 +1008,9 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv,
/* Re-add ethernet header length (removed in fbnic_build_skb) */
rcq->stats.bytes += ETH_HLEN * packets;
rcq->stats.dropped += dropped;
rcq->stats.rx.alloc_failed += alloc_failed;
rcq->stats.rx.csum_complete += csum_complete;
rcq->stats.rx.csum_none += csum_none;
u64_stats_update_end(&rcq->stats.syncp);
/* Unmap and free processed buffers */
@@ -1054,6 +1088,11 @@ void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn,
fbn->rx_stats.bytes += stats->bytes;
fbn->rx_stats.packets += stats->packets;
fbn->rx_stats.dropped += stats->dropped;
fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed;
fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete;
fbn->rx_stats.rx.csum_none += stats->rx.csum_none;
/* Remember to add new stats here */
BUILD_BUG_ON(sizeof(fbn->tx_stats.rx) / 8 != 3);
}
void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
@@ -1065,8 +1104,13 @@ void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn,
fbn->tx_stats.bytes += stats->bytes;
fbn->tx_stats.packets += stats->packets;
fbn->tx_stats.dropped += stats->dropped;
fbn->tx_stats.ts_lost += stats->ts_lost;
fbn->tx_stats.ts_packets += stats->ts_packets;
fbn->tx_stats.twq.csum_partial += stats->twq.csum_partial;
fbn->tx_stats.twq.ts_lost += stats->twq.ts_lost;
fbn->tx_stats.twq.ts_packets += stats->twq.ts_packets;
fbn->tx_stats.twq.stop += stats->twq.stop;
fbn->tx_stats.twq.wake += stats->twq.wake;
/* Remember to add new stats here */
BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 5);
}
static void fbnic_remove_tx_ring(struct fbnic_net *fbn,

View File

@@ -56,9 +56,21 @@ struct fbnic_pkt_buff {
struct fbnic_queue_stats {
u64 packets;
u64 bytes;
union {
struct {
u64 csum_partial;
u64 ts_packets;
u64 ts_lost;
u64 stop;
u64 wake;
} twq;
struct {
u64 alloc_failed;
u64 csum_complete;
u64 csum_none;
} rx;
};
u64 dropped;
u64 ts_packets;
u64 ts_lost;
struct u64_stats_sync syncp;
};

View File

@@ -23,6 +23,7 @@ struct netdev_queue_stats_rx {
u64 hw_drops;
u64 hw_drop_overruns;
u64 csum_complete;
u64 csum_unnecessary;
u64 csum_none;
u64 csum_bad;

View File

@@ -581,6 +581,7 @@ netdev_nl_stats_write_rx(struct sk_buff *rsp, struct netdev_queue_stats_rx *rx)
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_ALLOC_FAIL, rx->alloc_fail) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROPS, rx->hw_drops) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS, rx->hw_drop_overruns) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_COMPLETE, rx->csum_complete) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY, rx->csum_unnecessary) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_NONE, rx->csum_none) ||
netdev_stat_put(rsp, NETDEV_A_QSTATS_RX_CSUM_BAD, rx->csum_bad) ||