netifd: rewrite packet steering script
The new script uses a different strategy compared to the previous one. Instead of trying to split flows by hash and spread them to all CPUs, use RPS to redirect packets to a single core only. Try to spread NAPI thread and RPS target CPUs across available CPUs and try to ensure that the NAPI thread is on a different CPU than the RPS target. This significantly reduces cycles wasted on the scheduler. Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
parent
c2f52e42b1
commit
a205a5734e
|
@ -21,7 +21,7 @@ include $(INCLUDE_DIR)/cmake.mk
|
||||||
define Package/netifd
|
define Package/netifd
|
||||||
SECTION:=base
|
SECTION:=base
|
||||||
CATEGORY:=Base system
|
CATEGORY:=Base system
|
||||||
DEPENDS:=+libuci +libnl-tiny +libubus +ubus +ubusd +jshn +libubox +libudebug
|
DEPENDS:=+libuci +libnl-tiny +libubus +ubus +ubusd +jshn +libubox +libudebug +ucode +ucode-mod-fs
|
||||||
TITLE:=OpenWrt Network Interface Configuration Daemon
|
TITLE:=OpenWrt Network Interface Configuration Daemon
|
||||||
endef
|
endef
|
||||||
|
|
||||||
|
|
|
@ -14,5 +14,10 @@ service_triggers() {
|
||||||
}
|
}
|
||||||
|
|
||||||
reload_service() {
|
reload_service() {
|
||||||
/usr/libexec/network/packet-steering.sh
|
packet_steering="$(uci get "network.@globals[0].packet_steering")"
|
||||||
|
if [ -e "/usr/libexec/platform/packet-steering.sh" ]; then
|
||||||
|
/usr/libexec/platform/packet-steering.sh "$packet_steering"
|
||||||
|
else
|
||||||
|
/usr/libexec/network/packet-steering.uc "$packet_steering"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,70 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
NPROCS="$(grep -c "^processor.*:" /proc/cpuinfo)"
|
|
||||||
[ "$NPROCS" -gt 1 ] || exit
|
|
||||||
|
|
||||||
PROC_MASK="$(( (1 << $NPROCS) - 1 ))"
|
|
||||||
|
|
||||||
find_irq_cpu() {
|
|
||||||
local dev="$1"
|
|
||||||
local match="$(grep -m 1 "$dev\$" /proc/interrupts)"
|
|
||||||
local cpu=0
|
|
||||||
|
|
||||||
[ -n "$match" ] && {
|
|
||||||
set -- $match
|
|
||||||
shift
|
|
||||||
for cur in $(seq 1 $NPROCS); do
|
|
||||||
[ "$1" -gt 0 ] && {
|
|
||||||
cpu=$(($cur - 1))
|
|
||||||
break
|
|
||||||
}
|
|
||||||
shift
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "$cpu"
|
|
||||||
}
|
|
||||||
|
|
||||||
set_hex_val() {
|
|
||||||
local file="$1"
|
|
||||||
local val="$2"
|
|
||||||
val="$(printf %x "$val")"
|
|
||||||
[ -n "$DEBUG" ] && echo "$file = $val"
|
|
||||||
echo "$val" > "$file"
|
|
||||||
}
|
|
||||||
|
|
||||||
packet_steering="$(uci get "network.@globals[0].packet_steering")"
|
|
||||||
[ "$packet_steering" != 1 ] && exit 0
|
|
||||||
|
|
||||||
exec 512>/var/lock/smp_tune.lock
|
|
||||||
flock 512 || exit 1
|
|
||||||
|
|
||||||
[ -e "/usr/libexec/platform/packet-steering.sh" ] && {
|
|
||||||
/usr/libexec/platform/packet-steering.sh
|
|
||||||
exit 0
|
|
||||||
}
|
|
||||||
|
|
||||||
for dev in /sys/class/net/*; do
|
|
||||||
[ -d "$dev" ] || continue
|
|
||||||
|
|
||||||
# ignore virtual interfaces
|
|
||||||
[ -n "$(ls "${dev}/" | grep '^lower_')" ] && continue
|
|
||||||
[ -d "${dev}/device" ] || continue
|
|
||||||
|
|
||||||
device="$(readlink "${dev}/device")"
|
|
||||||
device="$(basename "$device")"
|
|
||||||
irq_cpu="$(find_irq_cpu "$device")"
|
|
||||||
irq_cpu_mask="$((1 << $irq_cpu))"
|
|
||||||
|
|
||||||
for q in ${dev}/queues/tx-*; do
|
|
||||||
set_hex_val "$q/xps_cpus" "$PROC_MASK"
|
|
||||||
done
|
|
||||||
|
|
||||||
# ignore dsa slave ports for RPS
|
|
||||||
subsys="$(readlink "${dev}/device/subsystem")"
|
|
||||||
subsys="$(basename "$subsys")"
|
|
||||||
[ "$subsys" = "mdio_bus" ] && continue
|
|
||||||
|
|
||||||
for q in ${dev}/queues/rx-*; do
|
|
||||||
set_hex_val "$q/rps_cpus" "$PROC_MASK"
|
|
||||||
done
|
|
||||||
done
|
|
|
@ -0,0 +1,200 @@
|
||||||
|
#!/usr/bin/env ucode
|
||||||
|
'use strict';
|
||||||
|
import { glob, basename, dirname, readlink, readfile, realpath, writefile, error, open } from "fs";
|
||||||
|
|
||||||
|
let napi_weight = 1.0;
|
||||||
|
let cpu_thread_weight = 0.75;
|
||||||
|
let rx_weight = 0.75;
|
||||||
|
let eth_bias = 2.0;
|
||||||
|
let debug = 0, do_nothing = 0;
|
||||||
|
let disable;
|
||||||
|
let cpus;
|
||||||
|
|
||||||
|
for (let arg in ARGV) {
|
||||||
|
switch (arg) {
|
||||||
|
case "-d":
|
||||||
|
debug++;
|
||||||
|
break;
|
||||||
|
case "-n":
|
||||||
|
do_nothing++;
|
||||||
|
break;
|
||||||
|
case '0':
|
||||||
|
disable = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function task_name(pid)
|
||||||
|
{
|
||||||
|
let stat = open(`/proc/${pid}/status`, "r");
|
||||||
|
let line = stat.read("line");
|
||||||
|
stat.close();
|
||||||
|
return trim(split(line, "\t", 2)[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
function set_task_cpu(pid, cpu) {
|
||||||
|
if (disable)
|
||||||
|
cpu = join(",", map(cpus, (cpu) => cpu.id));
|
||||||
|
if (debug || do_nothing)
|
||||||
|
warn(`taskset -p -c ${cpu} ${task_name(pid)}\n`);
|
||||||
|
if (!do_nothing)
|
||||||
|
system(`taskset -p -c ${cpu} ${pid}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function set_netdev_cpu(dev, cpu) {
|
||||||
|
let queues = glob(`/sys/class/net/${dev}/queues/rx-*/rps_cpus`);
|
||||||
|
let val = sprintf("%x", (1 << int(cpu)));
|
||||||
|
if (disable)
|
||||||
|
val = 0;
|
||||||
|
for (let queue in queues) {
|
||||||
|
if (debug || do_nothing)
|
||||||
|
warn(`echo ${val} > ${queue}\n`);
|
||||||
|
if (!do_nothing)
|
||||||
|
writefile(queue, `${val}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function task_device_match(name, device)
|
||||||
|
{
|
||||||
|
let napi_match = match(name, /napi\/([^-+])-\d+/);
|
||||||
|
if (!napi_match)
|
||||||
|
napi_match = match(name, /mt76-tx (phy\d+)/);
|
||||||
|
if (napi_match &&
|
||||||
|
(index(device.phy, napi_match[1]) >= 0 ||
|
||||||
|
index(device.netdev, napi_match[1]) >= 0))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (device.driver == "mtk_soc_eth" && match(name, /napi\/mtk_eth-/))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
cpus = map(glob("/sys/bus/cpu/devices/*"), (path) => {
|
||||||
|
return {
|
||||||
|
id: int(match(path, /.*cpu(\d+)/)[1]),
|
||||||
|
core: int(trim(readfile(`${path}/topology/core_id`))),
|
||||||
|
load: 0.0,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
cpus = slice(cpus, 0, 64);
|
||||||
|
if (length(cpus) < 2)
|
||||||
|
exit(0);
|
||||||
|
|
||||||
|
function cpu_add_weight(cpu_id, weight)
|
||||||
|
{
|
||||||
|
let cpu = cpus[cpu_id];
|
||||||
|
cpu.load += weight;
|
||||||
|
for (let sibling in cpus) {
|
||||||
|
if (sibling == cpu || sibling.core != cpu.core)
|
||||||
|
continue;
|
||||||
|
sibling.load += weight * cpu_thread_weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function get_next_cpu(weight, prev_cpu)
|
||||||
|
{
|
||||||
|
if (disable)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
let sort_cpus = sort(slice(cpus), (a, b) => a.load - b.load);
|
||||||
|
let idx = 0;
|
||||||
|
|
||||||
|
if (prev_cpu != null && sort_cpus[idx].id == prev_cpu)
|
||||||
|
idx++;
|
||||||
|
|
||||||
|
let cpu = sort_cpus[idx].id;
|
||||||
|
cpu_add_weight(cpu, weight);
|
||||||
|
return cpu;
|
||||||
|
}
|
||||||
|
|
||||||
|
let phys_devs = {};
|
||||||
|
let netdev_phys = {};
|
||||||
|
let netdevs = map(glob("/sys/class/net/*"), (dev) => basename(dev));
|
||||||
|
|
||||||
|
for (let dev in netdevs) {
|
||||||
|
let pdev_path = realpath(`/sys/class/net/${dev}/device`);
|
||||||
|
if (!pdev_path)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (length(glob(`/sys/class/net/${dev}/lower_*`)) > 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
let pdev = phys_devs[pdev_path];
|
||||||
|
if (!pdev) {
|
||||||
|
pdev = phys_devs[pdev_path] = {
|
||||||
|
path: pdev_path,
|
||||||
|
driver: basename(readlink(`${pdev_path}/driver`)),
|
||||||
|
netdev: [],
|
||||||
|
phy: [],
|
||||||
|
tasks: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let phyidx = trim(readfile(`/sys/class/net/${dev}/phy80211/index`));
|
||||||
|
if (phyidx != null) {
|
||||||
|
let phy = `phy${phyidx}`;
|
||||||
|
if (index(pdev.phy, phy) < 0)
|
||||||
|
push(pdev.phy, phy);
|
||||||
|
}
|
||||||
|
|
||||||
|
push(pdev.netdev, dev);
|
||||||
|
netdev_phys[dev] = pdev;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let path in glob("/proc/*/exe")) {
|
||||||
|
readlink(path);
|
||||||
|
if (error() != "No such file or directory")
|
||||||
|
continue;
|
||||||
|
|
||||||
|
let pid = basename(dirname(path));
|
||||||
|
let name = task_name(pid);
|
||||||
|
for (let devname in phys_devs) {
|
||||||
|
let dev = phys_devs[devname];
|
||||||
|
if (!task_device_match(name, dev))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
push(dev.tasks, pid);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function assign_dev_cpu(dev) {
|
||||||
|
if (length(dev.tasks) > 0) {
|
||||||
|
let cpu = dev.napi_cpu = get_next_cpu(napi_weight);
|
||||||
|
for (let task in dev.tasks)
|
||||||
|
set_task_cpu(task, cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length(dev.netdev) > 0) {
|
||||||
|
let cpu = dev.rx_cpu = get_next_cpu(rx_weight, dev.napi_cpu);
|
||||||
|
for (let netdev in dev.netdev)
|
||||||
|
set_netdev_cpu(netdev, cpu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign ethernet devices first
|
||||||
|
for (let devname in phys_devs) {
|
||||||
|
let dev = phys_devs[devname];
|
||||||
|
if (!length(dev.phy))
|
||||||
|
assign_dev_cpu(dev);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add bias to avoid assigning other tasks to CPUs with ethernet NAPI
|
||||||
|
for (let devname in phys_devs) {
|
||||||
|
let dev = phys_devs[devname];
|
||||||
|
if (!length(dev.tasks) || dev.napi_cpu == null)
|
||||||
|
continue;
|
||||||
|
cpu_add_weight(dev.napi_cpu, eth_bias);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign WLAN devices
|
||||||
|
for (let devname in phys_devs) {
|
||||||
|
let dev = phys_devs[devname];
|
||||||
|
if (length(dev.phy) > 0)
|
||||||
|
assign_dev_cpu(dev);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (debug > 1)
|
||||||
|
warn(sprintf("devices: %.J\ncpus: %.J\n", phys_devs, cpus));
|
Loading…
Reference in New Issue