"cat /proc/net/dev" uses RCU protection only.
Its quite possible we call a driver get_stats() method while device is
dismantling and freeing its data structures.
So get_stats() methods must be very careful not accessing driver private
data without appropriate locking.
In ixgbe case, we access rx_ring pointers. These pointers are freed in
ixgbe_clear_interrupt_scheme() and set to NULL, this can trigger NULL
dereference in ixgbe_get_stats64()
A possible fix is to use RCU locking in ixgbe_get_stats64() and defer
rx_ring freeing after a grace period in ixgbe_clear_interrupt_scheme()
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Tantilov, Emil S <emil.s.tantilov@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
unsigned int size; /* length in bytes */
dma_addr_t dma; /* phys. address of descriptor ring */
unsigned int size; /* length in bytes */
dma_addr_t dma; /* phys. address of descriptor ring */
} ____cacheline_internodealigned_in_smp;
enum ixgbe_ring_f_enum {
} ____cacheline_internodealigned_in_smp;
enum ixgbe_ring_f_enum {
+static void ring_free_rcu(struct rcu_head *head)
+{
+ kfree(container_of(head, struct ixgbe_ring, rcu));
+}
+
/**
* ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
* @adapter: board private structure to clear interrupt scheme on
/**
* ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
* @adapter: board private structure to clear interrupt scheme on
adapter->tx_ring[i] = NULL;
}
for (i = 0; i < adapter->num_rx_queues; i++) {
adapter->tx_ring[i] = NULL;
}
for (i = 0; i < adapter->num_rx_queues; i++) {
- kfree(adapter->rx_ring[i]);
+ struct ixgbe_ring *ring = adapter->rx_ring[i];
+
+ /* ixgbe_get_stats64() might access this ring, we must wait
+ * a grace period before freeing it.
+ */
+ call_rcu(&ring->rcu, ring_free_rcu);
adapter->rx_ring[i] = NULL;
}
adapter->rx_ring[i] = NULL;
}
/* accurate rx/tx bytes/packets stats */
dev_txq_stats_fold(netdev, stats);
/* accurate rx/tx bytes/packets stats */
dev_txq_stats_fold(netdev, stats);
for (i = 0; i < adapter->num_rx_queues; i++) {
for (i = 0; i < adapter->num_rx_queues; i++) {
- struct ixgbe_ring *ring = adapter->rx_ring[i];
+ struct ixgbe_ring *ring = ACCESS_ONCE(adapter->rx_ring[i]);
u64 bytes, packets;
unsigned int start;
u64 bytes, packets;
unsigned int start;
- do {
- start = u64_stats_fetch_begin_bh(&ring->syncp);
- packets = ring->stats.packets;
- bytes = ring->stats.bytes;
- } while (u64_stats_fetch_retry_bh(&ring->syncp, start));
- stats->rx_packets += packets;
- stats->rx_bytes += bytes;
+ if (ring) {
+ do {
+ start = u64_stats_fetch_begin_bh(&ring->syncp);
+ packets = ring->stats.packets;
+ bytes = ring->stats.bytes;
+ } while (u64_stats_fetch_retry_bh(&ring->syncp, start));
+ stats->rx_packets += packets;
+ stats->rx_bytes += bytes;
+ }
/* following stats updated by ixgbe_watchdog_task() */
stats->multicast = netdev->stats.multicast;
stats->rx_errors = netdev->stats.rx_errors;
/* following stats updated by ixgbe_watchdog_task() */
stats->multicast = netdev->stats.multicast;
stats->rx_errors = netdev->stats.rx_errors;
dca_unregister_notify(&dca_notifier);
#endif
pci_unregister_driver(&ixgbe_driver);
dca_unregister_notify(&dca_notifier);
#endif
pci_unregister_driver(&ixgbe_driver);
+ rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
#ifdef CONFIG_IXGBE_DCA
}
#ifdef CONFIG_IXGBE_DCA