From d8b1e11924dd2c45f0bd4df21b5f33f4c46cf815 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 28 Jul 2014 00:21:37 +0200 Subject: [PATCH 1/6] Add new package lua-ethtool-stats containing a minimal interface to replace `ethtool -S` --- libs/lua-ethtool-stats/Makefile | 34 +++++ libs/lua-ethtool-stats/README | 7 + libs/lua-ethtool-stats/src/Makefile | 6 + libs/lua-ethtool-stats/src/ethtool_stats.c | 170 +++++++++++++++++++++ 4 files changed, 217 insertions(+) create mode 100644 libs/lua-ethtool-stats/Makefile create mode 100644 libs/lua-ethtool-stats/README create mode 100644 libs/lua-ethtool-stats/src/Makefile create mode 100644 libs/lua-ethtool-stats/src/ethtool_stats.c diff --git a/libs/lua-ethtool-stats/Makefile b/libs/lua-ethtool-stats/Makefile new file mode 100644 index 0000000..1cdc772 --- /dev/null +++ b/libs/lua-ethtool-stats/Makefile @@ -0,0 +1,34 @@ +include $(TOPDIR)/rules.mk + +PKG_NAME:=lua-ethtool-stats +PKG_VERSION:=1 + +PKG_BUILD_DIR := $(BUILD_DIR)/$(PKG_NAME) + +include $(INCLUDE_DIR)/package.mk + +define Package/lua-ethtool-stats + SECTION:=libs + CATEGORY:=Libraries + TITLE:=Lua libary to obtain interface stats via ethtool + DEPENDS:=+lua +endef + +define Build/Prepare + mkdir -p $(PKG_BUILD_DIR) + $(CP) ./src/* $(PKG_BUILD_DIR)/ +endef + +define Build/Configure +endef + +define Build/Compile + CFLAGS="$(TARGET_CFLAGS)" CPPFLAGS="$(TARGET_CPPFLAGS)" $(MAKE) -C $(PKG_BUILD_DIR) $(TARGET_CONFIGURE_OPTS) +endef + +define Package/lua-ethtool-stats/install + $(INSTALL_DIR) $(1)/usr/lib/lua + $(CP) $(PKG_BUILD_DIR)/ethtool_stats.so $(1)/usr/lib/lua/ +endef + +$(eval $(call BuildPackage,lua-ethtool-stats)) diff --git a/libs/lua-ethtool-stats/README b/libs/lua-ethtool-stats/README new file mode 100644 index 0000000..87d683c --- /dev/null +++ b/libs/lua-ethtool-stats/README @@ -0,0 +1,7 @@ +This package provides a Lua module with the name 'ethtool_stats'. The module has +a single function, interface_stats(). + +interface_stats() expects one parameter, the name of the interface to get the +stats from. It returns a table with string keys and numeric values which +contains the stats returned by ethtool (the same `ethtool -S interface` would +yield). diff --git a/libs/lua-ethtool-stats/src/Makefile b/libs/lua-ethtool-stats/src/Makefile new file mode 100644 index 0000000..0b8e9a9 --- /dev/null +++ b/libs/lua-ethtool-stats/src/Makefile @@ -0,0 +1,6 @@ +all: ethtool_stats.so + +CFLAGS += -Wall + +ethtool_stats.so: ethtool_stats.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -shared -fPIC -o $@ $^ $(LDLIBS) diff --git a/libs/lua-ethtool-stats/src/ethtool_stats.c b/libs/lua-ethtool-stats/src/ethtool_stats.c new file mode 100644 index 0000000..7d11597 --- /dev/null +++ b/libs/lua-ethtool-stats/src/ethtool_stats.c @@ -0,0 +1,170 @@ + +/* + Copyright (c) 2014, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include + + +struct stats_context { + struct ifreq ifr; + int fd; + + struct ethtool_gstrings *strings; + struct ethtool_stats *stats; +}; + + +static inline void do_ioctl(lua_State *L, struct stats_context *ctx, void *data) { + ctx->ifr.ifr_data = data; + if (ioctl(ctx->fd, SIOCETHTOOL, &ctx->ifr) < 0) + luaL_error(L, "ioctl: %s", strerror(errno)); +} + +static inline uint32_t get_stats_length(lua_State *L, struct stats_context *ctx) { + const size_t sset_info_len = sizeof(struct ethtool_sset_info) + sizeof(uint32_t); + struct ethtool_sset_info *sset_info = alloca(sset_info_len); + memset(sset_info, 0, sset_info_len); + + sset_info->cmd = ETHTOOL_GSSET_INFO; + sset_info->sset_mask = 1ull << ETH_SS_STATS; + do_ioctl(L, ctx, sset_info); + + return sset_info->sset_mask ? sset_info->data[0] : 0; +} + +static inline void get_stats_strings(lua_State *L, struct stats_context *ctx) { + uint32_t n_stats = get_stats_length(L, ctx); + + if (!n_stats) + return; + + ctx->strings = calloc(1, sizeof(*ctx->strings) + n_stats * ETH_GSTRING_LEN); + if (!ctx->strings) { + luaL_error(L, "calloc: %s", strerror(errno)); + return; + } + + ctx->strings->cmd = ETHTOOL_GSTRINGS; + ctx->strings->string_set = ETH_SS_STATS; + ctx->strings->len = n_stats; + + do_ioctl(L, ctx, ctx->strings); +} + +static inline int get_stats(lua_State *L, struct stats_context *ctx) { + get_stats_strings(L, ctx); + + if (!ctx->strings) { + lua_newtable(L); + return 1; + } + + ctx->stats = calloc(1, sizeof(struct ethtool_stats) + ctx->strings->len * sizeof(uint64_t)); + if (!ctx->stats) + return luaL_error(L, "calloc: %s", strerror(errno)); + + ctx->stats->cmd = ETHTOOL_GSTATS; + ctx->stats->n_stats = ctx->strings->len; + + do_ioctl(L, ctx, ctx->stats); + + lua_createtable(L, 0, ctx->strings->len); + + size_t i; + for (i = 0; i < ctx->strings->len; i++) { + const char *key = (const char*)&ctx->strings->data[i * ETH_GSTRING_LEN]; + lua_pushlstring(L, key, strnlen(key, ETH_GSTRING_LEN)); + lua_pushnumber(L, (lua_Number)ctx->stats->data[i]); + lua_settable(L, -3); + } + + return 1; +} + + +static int interface_stats(lua_State *L) { + const char *ifname = luaL_checkstring(L, 1); + + struct stats_context *ctx = lua_newuserdata(L, sizeof(*ctx)); + memset(ctx, 0, sizeof(*ctx)); + + luaL_getmetatable(L, "ethtool_stats.ctx"); + lua_setmetatable(L, -2); + + strncpy(ctx->ifr.ifr_name, ifname, IFNAMSIZ); + + ctx->fd = socket(AF_INET, SOCK_DGRAM, 0); + if (ctx->fd < 0) + return luaL_error(L, "socket: %s", strerror(errno)); + + return get_stats(L, ctx); +} + +static int ctx_gc(lua_State *L) { + struct stats_context *ctx = lua_touserdata(L, 1); + + if (ctx->fd >= 0) + close(ctx->fd); + + free(ctx->strings); + free(ctx->stats); + + return 0; +} + +static const luaL_reg R[] = { + {"interface_stats", interface_stats}, + {NULL, NULL }, +}; + +LUALIB_API int luaopen_ethtool_stats(lua_State *L) { + luaL_newmetatable(L, "ethtool_stats.ctx"); + lua_pushstring(L, "__gc"); + lua_pushcfunction(L, ctx_gc); + lua_settable(L, -3); + + luaL_register(L, "ethtool_stats", R); + return 1; +} From 99d517e40379bc21348c5244c2b55a626e01bacf Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 28 Jul 2014 00:58:07 +0200 Subject: [PATCH 2/6] gluon-mesh-batman-adv: use lua-ethtool-stats instead of ethtool --- gluon/gluon-announce/Makefile | 2 +- .../files/lib/gluon/announce/statistics.d/traffic | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/gluon/gluon-announce/Makefile b/gluon/gluon-announce/Makefile index 81c35df..3f62c88 100644 --- a/gluon/gluon-announce/Makefile +++ b/gluon/gluon-announce/Makefile @@ -11,7 +11,7 @@ include $(INCLUDE_DIR)/package.mk define Package/gluon-announce SECTION:=gluon CATEGORY:=Gluon - DEPENDS:=+gluon-core +luci-lib-json +ethtool + DEPENDS:=+gluon-core +luci-lib-json +lua-ethtool-stats TITLE:=Lua scripts announcing various information endef diff --git a/gluon/gluon-mesh-batman-adv/files/lib/gluon/announce/statistics.d/traffic b/gluon/gluon-mesh-batman-adv/files/lib/gluon/announce/statistics.d/traffic index 378125e..01f6b4a 100644 --- a/gluon/gluon-mesh-batman-adv/files/lib/gluon/announce/statistics.d/traffic +++ b/gluon/gluon-mesh-batman-adv/files/lib/gluon/announce/statistics.d/traffic @@ -1,9 +1,6 @@ -local ethtool = util.exec('ethtool -S bat0') +local ethtool = require 'ethtool_stats' -local fields = {} -for k, v in ethtool:gmatch('([%a_]+): ([0-9]+)') do - fields[k] = tonumber(v) -end +local fields = ethtool.interface_stats('bat0') local traffic = {} for _, class in ipairs({'rx', 'tx', 'forward', 'mgmt_rx', 'mgmt_tx'}) do From 30e8c0e6bbd53a19314e26193fc7ab7350450805 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 28 Jul 2014 05:23:19 +0200 Subject: [PATCH 3/6] gluon-mesh-batman-adv: don't parse ip output for nodeinfo --- .../lib/gluon/announce/nodeinfo.d/network/addresses | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gluon/gluon-mesh-batman-adv/files/lib/gluon/announce/nodeinfo.d/network/addresses b/gluon/gluon-mesh-batman-adv/files/lib/gluon/announce/nodeinfo.d/network/addresses index 6df9687..d55f834 100644 --- a/gluon/gluon-mesh-batman-adv/files/lib/gluon/announce/nodeinfo.d/network/addresses +++ b/gluon/gluon-mesh-batman-adv/files/lib/gluon/announce/nodeinfo.d/network/addresses @@ -1,8 +1,12 @@ -local ip = util.exec('ip -o -6 addr show dev br-client') +local ip = require 'luci.ip' local addresses = {} -for _, line in ipairs(util.split(ip)) do - table.insert(addresses, line:match('inet6 ([%x:]+)/')) + +for line in io.lines('/proc/net/if_inet6') do + local matches = { line:match('^' .. string.rep('(%x%x%x%x)', 8) .. string.rep(' %x%x', 4) .. '%s+([^%s]+)$') } + if matches[9] == 'br-client' then + table.insert(addresses, ip.IPv6(string.format('%s:%s:%s:%s:%s:%s:%s:%s', unpack(matches))):string():lower()) + end end return addresses From 9c142642e0fceaf2238596393523963e296a221b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Mon, 28 Jul 2014 05:24:28 +0200 Subject: [PATCH 4/6] Remove outdated IP dependencies --- gluon/gluon-next-node/Makefile | 2 +- gluon/gluon-setup-mode/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gluon/gluon-next-node/Makefile b/gluon/gluon-next-node/Makefile index f906bbd..df27ecb 100644 --- a/gluon/gluon-next-node/Makefile +++ b/gluon/gluon-next-node/Makefile @@ -11,7 +11,7 @@ define Package/gluon-next-node SECTION:=gluon CATEGORY:=Gluon TITLE:=Next-node anycast address - DEPENDS:=+gluon-core +gluon-ebtables +gluon-mesh-batman-adv +ip +kmod-macvlan + DEPENDS:=+gluon-core +gluon-ebtables +gluon-mesh-batman-adv +kmod-macvlan endef define Package/gluon-next-node/description diff --git a/gluon/gluon-setup-mode/Makefile b/gluon/gluon-setup-mode/Makefile index 39952dd..e926fee 100644 --- a/gluon/gluon-setup-mode/Makefile +++ b/gluon/gluon-setup-mode/Makefile @@ -14,7 +14,7 @@ define Package/gluon-setup-mode SECTION:=gluon CATEGORY:=Gluon TITLE:=Setup mode - DEPENDS:=+gluon-core +uhttpd +dnsmasq +ip + DEPENDS:=+gluon-core +uhttpd +dnsmasq endef define Package/gluon-setup-mode/description From 54ce71d3277dff2210d60b7ecbec27f39ebb5685 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 29 Jul 2014 02:14:35 +0200 Subject: [PATCH 5/6] gluon-radvd: implement own radvd to reduce size --- gluon/gluon-radvd/Makefile | 8 +- .../gluon-radvd/files/etc/init.d/gluon-radvd | 18 +- .../files/lib/gluon/radvd/generate_config | 14 - .../upgrade/radvd/invariant/10-radvd-user | 2 +- gluon/gluon-radvd/src/Makefile | 4 + gluon/gluon-radvd/src/gluon-radvd.c | 647 ++++++++++++++++++ 6 files changed, 662 insertions(+), 31 deletions(-) delete mode 100755 gluon/gluon-radvd/files/lib/gluon/radvd/generate_config create mode 100644 gluon/gluon-radvd/src/Makefile create mode 100644 gluon/gluon-radvd/src/gluon-radvd.c diff --git a/gluon/gluon-radvd/Makefile b/gluon/gluon-radvd/Makefile index 2b99c28..4736af6 100644 --- a/gluon/gluon-radvd/Makefile +++ b/gluon/gluon-radvd/Makefile @@ -1,7 +1,7 @@ include $(TOPDIR)/rules.mk PKG_NAME:=gluon-radvd -PKG_VERSION:=2 +PKG_VERSION:=3 PKG_BUILD_DIR := $(BUILD_DIR)/$(PKG_NAME) @@ -11,7 +11,7 @@ define Package/gluon-radvd SECTION:=gluon CATEGORY:=Gluon TITLE:=Advertise an IPv6 prefix from the node - DEPENDS:=+gluon-core +gluon-ebtables +gluon-mesh-batman-adv +radvd + DEPENDS:=+gluon-core +gluon-ebtables +gluon-mesh-batman-adv +librt endef define Package/gluon-radvd/description @@ -20,16 +20,20 @@ endef define Build/Prepare mkdir -p $(PKG_BUILD_DIR) + $(CP) ./src/* $(PKG_BUILD_DIR)/ endef define Build/Configure endef define Build/Compile + CFLAGS="$(TARGET_CFLAGS)" CPPFLAGS="$(TARGET_CPPFLAGS)" $(MAKE) -C $(PKG_BUILD_DIR) $(TARGET_CONFIGURE_OPTS) endef define Package/gluon-radvd/install $(CP) ./files/* $(1)/ + $(INSTALL_DIR) $(1)/usr/sbin + $(INSTALL_BIN) $(PKG_BUILD_DIR)/gluon-radvd $(1)/usr/sbin/ endef $(eval $(call BuildPackage,gluon-radvd)) diff --git a/gluon/gluon-radvd/files/etc/init.d/gluon-radvd b/gluon/gluon-radvd/files/etc/init.d/gluon-radvd index 83814d5..14815e7 100755 --- a/gluon/gluon-radvd/files/etc/init.d/gluon-radvd +++ b/gluon/gluon-radvd/files/etc/init.d/gluon-radvd @@ -2,24 +2,14 @@ START=50 -SERVICE_USE_PID=1 -SERVICE_NAME=gluon-radvd -SERVICE_PID_DIR=/var/run/gluon-radvd -SERVICE_PID_FILE="$SERVICE_PID_DIR"/gluon-radvd.pid +SERVICE_WRITE_PID=1 +SERVICE_DAEMONIZE=1 -radvd_conf=/var/gluon/radvd/radvd.conf start() { - mkdir -p "$SERVICE_PID_DIR" - chown gluon-radvd "$SERVICE_PID_DIR" - - mkdir -p "$(dirname "$radvd_conf")" - /lib/gluon/radvd/generate_config > "$radvd_conf" - - service_start /usr/sbin/radvd -C "$radvd_conf" -m stderr_syslog -u gluon-radvd -p "$SERVICE_PID_FILE" + service_start /usr/sbin/gluon-radvd -i br-client -p $(lua -e 'print(require("gluon.site_config").prefix6)') } stop() { - service_stop /usr/sbin/radvd - rm "$radvd_conf" + service_stop /usr/sbin/gluon-radvd } diff --git a/gluon/gluon-radvd/files/lib/gluon/radvd/generate_config b/gluon/gluon-radvd/files/lib/gluon/radvd/generate_config deleted file mode 100755 index 7c0e3a9..0000000 --- a/gluon/gluon-radvd/files/lib/gluon/radvd/generate_config +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/lua - -local site = require 'gluon.site_config' - -print([[ -interface br-client -{ - IgnoreIfMissing on; - AdvSendAdvert on; - AdvDefaultLifetime 0; - - prefix ]] .. site.prefix6 .. [[ {}; -}; -]]) diff --git a/gluon/gluon-radvd/files/lib/gluon/upgrade/radvd/invariant/10-radvd-user b/gluon/gluon-radvd/files/lib/gluon/upgrade/radvd/invariant/10-radvd-user index d2be86a..036406a 100755 --- a/gluon/gluon-radvd/files/lib/gluon/upgrade/radvd/invariant/10-radvd-user +++ b/gluon/gluon-radvd/files/lib/gluon/upgrade/radvd/invariant/10-radvd-user @@ -2,4 +2,4 @@ local users = require 'gluon.users' -users.add_user('gluon-radvd', 801, 100) +users.remove_user('gluon-radvd') diff --git a/gluon/gluon-radvd/src/Makefile b/gluon/gluon-radvd/src/Makefile new file mode 100644 index 0000000..f0bc903 --- /dev/null +++ b/gluon/gluon-radvd/src/Makefile @@ -0,0 +1,4 @@ +all: gluon-radvd + +gluon-radvd: gluon-radvd.c + $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -Wall -o $@ $^ $(LDLIBS) -lrt diff --git a/gluon/gluon-radvd/src/gluon-radvd.c b/gluon/gluon-radvd/src/gluon-radvd.c new file mode 100644 index 0000000..801d985 --- /dev/null +++ b/gluon/gluon-radvd/src/gluon-radvd.c @@ -0,0 +1,647 @@ +/* + Copyright (c) 2014, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + + +#define MAX_PREFIXES 8 + +/* These are in seconds */ +#define AdvValidLifetime 86400u +#define AdvPreferredLifetime 14400u +#define AdvDefaultLifetime 0u +#define AdvCurHopLimit 64u + +#define MinRtrAdvInterval 200u +#define MaxRtrAdvInterval 600u + +/* And these in milliseconds */ +#define MAX_RA_DELAY_TIME 500u +#define MIN_DELAY_BETWEEN_RAS 3000u + + +struct icmpv6_opt { + uint8_t type; + uint8_t length; + uint8_t data[6]; +}; + + +struct iface { + bool ok; + unsigned int ifindex; + struct in6_addr ifaddr; + uint8_t mac[6]; +}; + +static struct global { + struct iface iface; + + struct timespec time; + struct timespec next_advert; + struct timespec next_advert_earliest; + + int icmp_sock; + int rtnl_sock; + + const char *ifname; + + size_t n_prefixes; + struct in6_addr prefixes[MAX_PREFIXES]; +} G = { + .rtnl_sock = -1, + .icmp_sock = -1, +}; + + +static inline void exit_errno(const char *message) { + error(1, errno, "error: %s", message); +} + +static inline void warn_errno(const char *message) { + error(0, errno, "warning: %s", message); +} + + +static inline void update_time(void) { + clock_gettime(CLOCK_MONOTONIC, &G.time); +} + +/* Compares two timespecs and returns true if tp1 is after tp2 */ +static inline bool timespec_after(const struct timespec *tp1, const struct timespec *tp2) { + return (tp1->tv_sec > tp2->tv_sec || + (tp1->tv_sec == tp2->tv_sec && tp1->tv_nsec > tp2->tv_nsec)); +} + +/* Returns (tp1 - tp2) in milliseconds */ +static inline int timespec_diff(const struct timespec *tp1, const struct timespec *tp2) { + return ((tp1->tv_sec - tp2->tv_sec))*1000 + (tp1->tv_nsec - tp2->tv_nsec)/1e6; +} + +static inline void timespec_add(struct timespec *tp, unsigned int ms) { + tp->tv_sec += ms/1000; + tp->tv_nsec += (ms%1000) * 1e6; + + if (tp->tv_nsec >= 1e9) { + tp->tv_nsec -= 1e9; + tp->tv_sec++; + } +} + + +static inline int setsockopt_int(int socket, int level, int option, int value) { + return setsockopt(socket, level, option, &value, sizeof(value)); +} + + +static void init_random(void) { + unsigned int seed; + int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + exit_errno("can't open /dev/urandom"); + + if (read(fd, &seed, sizeof(seed)) != sizeof(seed)) + exit_errno("can't read from /dev/urandom"); + + close(fd); + + srandom(seed); +} + +static inline int rand_range(int min, int max) { + unsigned int r = (unsigned int)random(); + return (r%(max-min) + min); +} + +static void init_icmp(void) { + G.icmp_sock = socket(AF_INET6, SOCK_RAW|SOCK_NONBLOCK, IPPROTO_ICMPV6); + if (G.icmp_sock < 0) + exit_errno("can't open ICMP socket"); + + setsockopt_int(G.icmp_sock, IPPROTO_RAW, IPV6_CHECKSUM, 2); + + setsockopt_int(G.icmp_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 255); + setsockopt_int(G.icmp_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, 1); + + setsockopt_int(G.icmp_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, 1); + + struct icmp6_filter filter; + ICMP6_FILTER_SETBLOCKALL(&filter); + ICMP6_FILTER_SETPASS(ND_ROUTER_SOLICIT, &filter); + setsockopt(G.icmp_sock, IPPROTO_ICMPV6, ICMP6_FILTER, &filter, sizeof(filter)); +} + +static void init_rtnl(void) { + G.rtnl_sock = socket(AF_NETLINK, SOCK_DGRAM|SOCK_NONBLOCK, NETLINK_ROUTE); + if (G.rtnl_sock < 0) + exit_errno("can't open RTNL socket"); + + struct sockaddr_nl snl = { + .nl_family = AF_NETLINK, + .nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR, + }; + if (bind(G.rtnl_sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) + exit_errno("can't bind RTNL socket"); +} + + +static void schedule_advert(bool nodelay) { + struct timespec t = G.time; + + if (nodelay) + timespec_add(&t, rand_range(0, MAX_RA_DELAY_TIME)); + else + timespec_add(&t, rand_range(MinRtrAdvInterval*1000, MaxRtrAdvInterval*1000)); + + if (timespec_after(&G.next_advert_earliest, &t)) + t = G.next_advert_earliest; + + if (!nodelay || timespec_after(&G.next_advert, &t)) + G.next_advert = t; +} + + +static int join_multicast(void) { + struct ipv6_mreq mreq = { + .ipv6mr_multiaddr = { + .s6_addr = { + /* all-routers address */ + 0xff, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, + } + }, + .ipv6mr_interface = G.iface.ifindex, + }; + + if (setsockopt(G.icmp_sock, IPPROTO_IPV6, IPV6_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) == 0) { + return 2; + } + else if (errno != EADDRINUSE) { + warn_errno("can't join multicast group"); + return 0; + } + + return 1; +} + +static void update_interface(void) { + struct iface old; + + memcpy(&old, &G.iface, sizeof(struct iface)); + memset(&G.iface, 0, sizeof(struct iface)); + + /* Update ifindex */ + G.iface.ifindex = if_nametoindex(G.ifname); + if (!G.iface.ifindex) + return; + + /* Update MAC address */ + struct ifreq ifr = {}; + strncpy(ifr.ifr_name, G.ifname, sizeof(ifr.ifr_name)-1); + if (ioctl(G.icmp_sock, SIOCGIFHWADDR, &ifr) < 0) + return; + + memcpy(G.iface.mac, ifr.ifr_hwaddr.sa_data, sizeof(G.iface.mac)); + + struct ifaddrs *addrs, *addr; + if (getifaddrs(&addrs) < 0) { + warn_errno("getifaddrs"); + return; + } + + memset(&G.iface.ifaddr, 0, sizeof(G.iface.ifaddr)); + + for (addr = addrs; addr; addr = addr->ifa_next) { + if (addr->ifa_addr->sa_family != AF_INET6) + continue; + + const struct sockaddr_in6 *in6 = (const struct sockaddr_in6 *)addr->ifa_addr; + if (!IN6_IS_ADDR_LINKLOCAL(&in6->sin6_addr)) + continue; + + if (strncmp(addr->ifa_name, G.ifname, IFNAMSIZ-1) != 0) + continue; + + G.iface.ifaddr = in6->sin6_addr; + } + + freeifaddrs(addrs); + + if (IN6_IS_ADDR_UNSPECIFIED(&G.iface.ifaddr)) + return; + + int joined = join_multicast(); + if (!joined) + return; + + setsockopt(G.icmp_sock, SOL_SOCKET, SO_BINDTODEVICE, G.ifname, strnlen(G.ifname, IFNAMSIZ-1)); + + G.iface.ok = true; + + if (memcmp(&old, &G.iface, sizeof(struct iface)) != 0 || joined == 2) + schedule_advert(true); +} + + +static bool handle_rtnl_link(uint16_t type, const struct ifinfomsg *msg) { + switch (type) { + case RTM_NEWLINK: + if (!G.iface.ok) + return true; + + break; + + case RTM_SETLINK: + if ((unsigned)msg->ifi_index == G.iface.ifindex) + return true; + + if (!G.iface.ok) + return true; + + break; + + case RTM_DELLINK: + if (G.iface.ok && (unsigned)msg->ifi_index == G.iface.ifindex) + return true; + } + + return false; +} + +static bool handle_rtnl_addr(uint16_t type, const struct ifaddrmsg *msg) { + switch (type) { + case RTM_NEWADDR: + if (!G.iface.ok && (unsigned)msg->ifa_index == G.iface.ifindex) + return true; + + break; + + case RTM_DELADDR: + if (G.iface.ok && (unsigned)msg->ifa_index == G.iface.ifindex) + return true; + } + + return false; +} + +static bool handle_rtnl_msg(uint16_t type, const void *data) { + switch (type) { + case RTM_NEWLINK: + case RTM_DELLINK: + case RTM_SETLINK: + return handle_rtnl_link(type, data); + + case RTM_NEWADDR: + case RTM_DELADDR: + return handle_rtnl_addr(type, data); + + default: + return false; + } +} + +static void handle_rtnl(void) { + char buffer[4096]; + + ssize_t len = recv(G.rtnl_sock, buffer, sizeof(buffer), 0); + if (len < 0) { + warn_errno("recv"); + return; + } + + const struct nlmsghdr *nh; + for (nh = (struct nlmsghdr *)buffer; NLMSG_OK(nh, len); nh = NLMSG_NEXT(nh, len)) { + switch (nh->nlmsg_type) { + case NLMSG_DONE: + return; + + case NLMSG_ERROR: + error(1, 0, "error: netlink error"); + + default: + if (handle_rtnl_msg(nh->nlmsg_type, NLMSG_DATA(nh))) { + update_interface(); + return; + } + } + } +} + +static void add_pktinfo(struct msghdr *msg) { + struct cmsghdr *cmsg = (struct cmsghdr*)((char*)msg->msg_control + msg->msg_controllen); + + cmsg->cmsg_level = IPPROTO_IPV6; + cmsg->cmsg_type = IPV6_PKTINFO; + cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); + + msg->msg_controllen += cmsg->cmsg_len; + + struct in6_pktinfo pktinfo = { + .ipi6_addr = G.iface.ifaddr, + .ipi6_ifindex = G.iface.ifindex, + }; + + memcpy(CMSG_DATA(cmsg), &pktinfo, sizeof(pktinfo)); +} + + +static void handle_solicit(void) { + struct sockaddr_in6 addr; + + uint8_t buffer[1500] __attribute__((aligned(8))); + struct iovec vec = { .iov_base = buffer, .iov_len = sizeof(buffer) }; + + uint8_t cbuf[1024] __attribute__((aligned(8))); + + + struct msghdr msg = { + .msg_name = &addr, + .msg_namelen = sizeof(addr), + .msg_iov = &vec, + .msg_iovlen = 1, + .msg_control = cbuf, + .msg_controllen = sizeof(cbuf), + }; + + ssize_t len = recvmsg(G.icmp_sock, &msg, 0); + if (len < (ssize_t)sizeof(struct nd_router_solicit)) { + if (len < 0) + warn_errno("recvmsg"); + + return; + } + + struct cmsghdr *cmsg; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != IPPROTO_IPV6) + continue; + + if (cmsg->cmsg_type != IPV6_HOPLIMIT) + continue; + + if (*(int*)CMSG_DATA(cmsg) != 255) + return; + + break; + } + + const struct nd_router_solicit *s = (struct nd_router_solicit *)buffer; + if (s->nd_rs_hdr.icmp6_type != ND_ROUTER_SOLICIT || s->nd_rs_hdr.icmp6_code != 0) + return; + + const struct icmpv6_opt *opt = (struct icmpv6_opt *)(buffer + sizeof(struct nd_router_solicit)), *end = (struct icmpv6_opt *)(buffer+len); + + for (; opt < end; opt += opt->length) { + if (opt+1 < end) + return; + + if (!opt->length) + return; + + if (opt+opt->length < end) + return; + + if (opt->type == ND_OPT_SOURCE_LINKADDR && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6_addr)) + return; + } + + if (opt != end) + return; + + schedule_advert(true); +} + +static void send_advert(void) { + if (!G.iface.ok) + return; + + struct nd_router_advert advert = { + .nd_ra_hdr = { + .icmp6_type = ND_ROUTER_ADVERT, + .icmp6_dataun.icmp6_un_data8 = {AdvCurHopLimit, 0 /* Flags */, (AdvDefaultLifetime>>8) & 0xff, AdvDefaultLifetime & 0xff }, + }, + }; + + struct icmpv6_opt lladdr = {ND_OPT_SOURCE_LINKADDR, 1, {}}; + memcpy(lladdr.data, G.iface.mac, sizeof(G.iface.mac)); + + struct nd_opt_prefix_info prefixes[G.n_prefixes]; + + size_t i; + for (i = 0; i < G.n_prefixes; i++) { + prefixes[i] = (struct nd_opt_prefix_info){ + .nd_opt_pi_type = ND_OPT_PREFIX_INFORMATION, + .nd_opt_pi_len = 4, + .nd_opt_pi_prefix_len = 64, + .nd_opt_pi_flags_reserved = ND_OPT_PI_FLAG_AUTO|ND_OPT_PI_FLAG_ONLINK, + .nd_opt_pi_valid_time = htonl(AdvValidLifetime), + .nd_opt_pi_preferred_time = htonl(AdvPreferredLifetime), + .nd_opt_pi_prefix = G.prefixes[i], + }; + } + + struct iovec vec[3] = { + { .iov_base = &advert, .iov_len = sizeof(advert) }, + { .iov_base = &lladdr, .iov_len = sizeof(lladdr) }, + { .iov_base = prefixes, .iov_len = sizeof(prefixes) }, + }; + + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_addr = { + .s6_addr = { + /* all-nodes address */ + 0xff, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + } + }, + .sin6_scope_id = G.iface.ifindex, + }; + + uint8_t cbuf[1024] __attribute__((aligned(8))) = {}; + + struct msghdr msg = { + .msg_name = &addr, + .msg_namelen = sizeof(addr), + .msg_iov = vec, + .msg_iovlen = 3, + .msg_control = cbuf, + .msg_controllen = 0, + .msg_flags = 0, + }; + + add_pktinfo(&msg); + + if (sendmsg(G.icmp_sock, &msg, 0) < 0) { + G.iface.ok = false; + return; + } + + G.next_advert_earliest = G.time; + timespec_add(&G.next_advert_earliest, MIN_DELAY_BETWEEN_RAS); + + schedule_advert(false); +} + + +static void usage(void) { + fprintf(stderr, "Usage: gluon-radvd [-h] -i -p [ -p ... ]\n"); +} + +static void add_prefix(const char *prefix) { + if (G.n_prefixes == MAX_PREFIXES) + error(1, 0, "maximum number of prefixes is %i.", MAX_PREFIXES); + + const size_t len = strlen(prefix)+1; + char prefix2[len]; + memcpy(prefix2, prefix, len); + + char *slash = strchr(prefix2, '/'); + if (slash) { + *slash = 0; + if (strcmp(slash+1, "64") != 0) + goto error; + } + + if (inet_pton(AF_INET6, prefix2, &G.prefixes[G.n_prefixes]) != 1) + goto error; + + static const uint8_t zero[8] = {}; + if (memcmp(G.prefixes[G.n_prefixes].s6_addr + 8, zero, 8) != 0) + goto error; + + G.n_prefixes++; + return; + + error: + error(1, 0, "invalid prefix %s (only prefixes of length 64 are supported).", prefix); +} + +static void parse_cmdline(int argc, char *argv[]) { + int c; + while ((c = getopt(argc, argv, "i:p:h")) != -1) { + switch(c) { + case 'i': + if (G.ifname) + error(1, 0, "multiple interfaces are not supported."); + + G.ifname = optarg; + + break; + + case 'p': + add_prefix(optarg); + break; + + case 'h': + usage(); + exit(0); + + default: + usage(); + exit(1); + } + } +} + +int main(int argc, char *argv[]) { + parse_cmdline(argc, argv); + + if (!G.ifname || !G.n_prefixes) + error(1, 0, "interface and prefix arguments are required."); + + init_random(); + init_icmp(); + init_rtnl(); + + update_time(); + G.next_advert = G.next_advert_earliest = G.time; + + update_interface(); + + while (true) { + struct pollfd fds[2] = { + { .fd = G.icmp_sock, .events = POLLIN }, + { .fd = G.rtnl_sock, .events = POLLIN }, + }; + + int timeout = -1; + + if (G.iface.ok) { + timeout = timespec_diff(&G.next_advert, &G.time); + + if (timeout < 0) + timeout = 0; + } + + int ret = poll(fds, 2, timeout); + if (ret < 0) + exit_errno("poll"); + + update_time(); + + if (fds[0].revents & POLLIN) + handle_solicit(); + if (fds[1].revents & POLLIN) + handle_rtnl(); + + if (timespec_after(&G.time, &G.next_advert)) + send_advert(); + } +} From b731fe1693b45b7afc5993f23de9f56cf9ef24b6 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Tue, 29 Jul 2014 17:44:07 +0200 Subject: [PATCH 6/6] gluon-simple-tc: replace tc with own implementation to reduce size --- gluon/gluon-simple-tc/Makefile | 11 +- .../etc/hotplug.d/net/50-gluon-simple-tc | 12 +- .../files/etc/modules.d/30-gluon-simple-tc | 2 +- gluon/gluon-simple-tc/src/Makefile | 4 + gluon/gluon-simple-tc/src/gluon-simple-tc.c | 292 ++++++ .../src/include/linux/pkt_cls.h | 483 ++++++++++ .../src/include/linux/pkt_sched.h | 846 ++++++++++++++++++ .../src/include/linux/rtnetlink.h | 639 +++++++++++++ 8 files changed, 2278 insertions(+), 11 deletions(-) create mode 100644 gluon/gluon-simple-tc/src/Makefile create mode 100644 gluon/gluon-simple-tc/src/gluon-simple-tc.c create mode 100644 gluon/gluon-simple-tc/src/include/linux/pkt_cls.h create mode 100644 gluon/gluon-simple-tc/src/include/linux/pkt_sched.h create mode 100644 gluon/gluon-simple-tc/src/include/linux/rtnetlink.h diff --git a/gluon/gluon-simple-tc/Makefile b/gluon/gluon-simple-tc/Makefile index 4ae5ca9..b01b964 100644 --- a/gluon/gluon-simple-tc/Makefile +++ b/gluon/gluon-simple-tc/Makefile @@ -1,7 +1,7 @@ include $(TOPDIR)/rules.mk PKG_NAME:=gluon-simple-tc -PKG_VERSION:=3 +PKG_VERSION:=4 PKG_BUILD_DIR := $(BUILD_DIR)/$(PKG_NAME) @@ -11,7 +11,7 @@ define Package/gluon-simple-tc SECTION:=gluon CATEGORY:=Gluon TITLE:=Bandwidth limit support - DEPENDS:=+gluon-core +tc +kmod-sched + DEPENDS:=+gluon-core +kmod-sched +libnl-tiny endef define Package/gluon-simple-tc/description @@ -20,16 +20,23 @@ endef define Build/Prepare mkdir -p $(PKG_BUILD_DIR) + $(CP) ./src/* $(PKG_BUILD_DIR)/ endef define Build/Configure endef + +TARGET_CFLAGS += -I$(STAGING_DIR)/usr/include/libnl-tiny + define Build/Compile + CFLAGS="$(TARGET_CFLAGS)" CPPFLAGS="$(TARGET_CPPFLAGS)" $(MAKE) -C $(PKG_BUILD_DIR) $(TARGET_CONFIGURE_OPTS) endef define Package/gluon-simple-tc/install $(CP) ./files/* $(1)/ + $(INSTALL_DIR) $(1)/usr/sbin + $(INSTALL_BIN) $(PKG_BUILD_DIR)/gluon-simple-tc $(1)/usr/sbin/ endef define Package/gluon-simple-tc/postinst diff --git a/gluon/gluon-simple-tc/files/etc/hotplug.d/net/50-gluon-simple-tc b/gluon/gluon-simple-tc/files/etc/hotplug.d/net/50-gluon-simple-tc index e5ea8f7..8dd8278 100644 --- a/gluon/gluon-simple-tc/files/etc/hotplug.d/net/50-gluon-simple-tc +++ b/gluon/gluon-simple-tc/files/etc/hotplug.d/net/50-gluon-simple-tc @@ -14,17 +14,13 @@ tc_interface() { [ "$enabled" -eq 1 ] || return - config_get limit_egress "$iface" limit_egress config_get limit_ingress "$iface" limit_ingress + config_get limit_egress "$iface" limit_egress - if [ "$limit_egress" ]; then - tc qdisc add dev "$INTERFACE" root tbf rate "${limit_egress}kbit" latency 50ms burst 2k - fi + [ "$limit_ingress" ] || limit_ingress=- + [ "$limit_egress" ] || limit_egress=- - if [ "$limit_ingress" ]; then - tc qdisc add dev "$INTERFACE" handle ffff: ingress - tc filter add dev "$INTERFACE" parent ffff: u32 match u8 00 00 at 0 police rate "${limit_ingress}kbit" burst 10k drop flowid :1 - fi + gluon-simple-tc "$INTERFACE" "$limit_ingress" "$limit_egress" } config_foreach tc_interface 'interface' diff --git a/gluon/gluon-simple-tc/files/etc/modules.d/30-gluon-simple-tc b/gluon/gluon-simple-tc/files/etc/modules.d/30-gluon-simple-tc index 87cb79f..72b238c 100644 --- a/gluon/gluon-simple-tc/files/etc/modules.d/30-gluon-simple-tc +++ b/gluon/gluon-simple-tc/files/etc/modules.d/30-gluon-simple-tc @@ -1,4 +1,4 @@ sch_ingress sch_tbf -cls_u32 +cls_basic act_police diff --git a/gluon/gluon-simple-tc/src/Makefile b/gluon/gluon-simple-tc/src/Makefile new file mode 100644 index 0000000..502c623 --- /dev/null +++ b/gluon/gluon-simple-tc/src/Makefile @@ -0,0 +1,4 @@ +all: gluon-simple-tc + +gluon-simple-tc: gluon-simple-tc.c + $(CC) -Iinclude $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -Wall -o $@ $^ $(LDLIBS) -lnl-tiny diff --git a/gluon/gluon-simple-tc/src/gluon-simple-tc.c b/gluon/gluon-simple-tc/src/gluon-simple-tc.c new file mode 100644 index 0000000..9e5bb20 --- /dev/null +++ b/gluon/gluon-simple-tc/src/gluon-simple-tc.c @@ -0,0 +1,292 @@ +/* + Copyright (c) 2014, Matthias Schiffer + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include + + +#include +#include +#include + + +static struct nl_cb *cb; +static struct nl_sock *sock; +static double ticks; + +static unsigned ifindex; + +static bool nlexpect; +static int nlerror; + + +static inline void exit_errno(const char *message) { + error(1, errno, "error: %s", message); +} + +static inline void warn_errno(const char *message) { + error(0, errno, "warning: %s", message); +} + + +static void read_psched(void) { + uint32_t clock_res; + uint32_t t2us; + uint32_t us2t; + + FILE *f = fopen("/proc/net/psched", "r"); + if (!f || fscanf(f, "%08x %08x %08x", &t2us, &us2t, &clock_res) != 3) + exit_errno("error reading /proc/net/psched"); + fclose(f); + + /* compatibility hack from iproute... */ + if (clock_res == 1000000000) + t2us = us2t; + + ticks = (double)t2us / us2t * clock_res; +} + + +static struct nl_msg * prepare_tcmsg(int type, int flags, uint32_t parent, uint32_t handle, uint32_t info) { + struct nl_msg *msg = nlmsg_alloc_simple(type, flags); + if (!msg) + exit_errno("nlmsg_alloc_simple"); + + struct tcmsg tcmsg; + memset(&tcmsg, 0, sizeof(tcmsg)); + + tcmsg.tcm_family = AF_UNSPEC; + tcmsg.tcm_ifindex = ifindex; + tcmsg.tcm_parent = parent; + tcmsg.tcm_handle = handle; + tcmsg.tcm_info = info; + + nlmsg_append(msg, &tcmsg, sizeof(tcmsg), NLMSG_ALIGNTO); + + return msg; +} + + +static int error_handler(struct sockaddr_nl *nla __attribute__((unused)), struct nlmsgerr *nlerr, void *arg __attribute__((unused))) { + if (!nlexpect || (nlerr->error != -ENOENT && nlerr->error != -EINVAL)) + nlerror = -nlerr->error; + + return NL_STOP; +} + +static bool do_send(struct nl_msg *msg, bool expect) { + nlerror = 0; + nlexpect = expect; + + nl_send_auto_complete(sock, msg); + nlmsg_free(msg); + nl_wait_for_ack(sock); + + if (nlerror) { + error(0, nlerror, "netlink"); + errno = nlerror; + return false; + } + + return true; +} + + +static inline unsigned get_xmittime(double rate, unsigned size) { + return ticks * (size/rate); +} + + +static void complete_rate(struct tc_ratespec *r, uint32_t rtab[256]) { + r->linklayer = TC_LINKLAYER_ETHERNET; + r->cell_align = -1; + r->cell_log = 3; + + unsigned i; + for (i = 0; i < 256; i++) + rtab[i] = get_xmittime(r->rate, (i + 1) << 3); +} + + +static void do_ingress(double rate) { + if (!do_send(prepare_tcmsg(RTM_DELQDISC, 0, TC_H_INGRESS, 0xffff0000, 0), true)) + return; + + if (rate < 0) + return; + + + struct nl_msg *msg = prepare_tcmsg(RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL, TC_H_INGRESS, 0xffff0000, 0); + nla_put_string(msg, TCA_KIND, "ingress"); + + if (!do_send(msg, false)) + return; + + + msg = prepare_tcmsg(RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_EXCL, 0xffff0000, 0, TC_H_MAKE(0, htons(ETH_P_ALL))); + + const unsigned buffer = 10240; + + struct tc_police p; + memset(&p, 0, sizeof(p)); + + /* Range check has been done in main() */ + p.rate.rate = rate; + p.burst = get_xmittime(p.rate.rate, buffer); + p.action = TC_POLICE_SHOT; + + uint32_t rtab[256]; + complete_rate(&p.rate, rtab); + + nla_put_string(msg, TCA_KIND, "basic"); + + struct nlattr *opts = nla_nest_start(msg, TCA_OPTIONS); + struct nlattr *police = nla_nest_start(msg, TCA_BASIC_POLICE); + + nla_put(msg, TCA_POLICE_TBF, sizeof(p), &p); + nla_put(msg, TCA_POLICE_RATE, sizeof(rtab), rtab); + + nla_nest_end(msg, police); + nla_nest_end(msg, opts); + + do_send(msg, false); +} + +static void do_egress(double rate) { + if (!do_send(prepare_tcmsg(RTM_DELQDISC, 0, TC_H_ROOT, 0, 0), true)) + return; + + if (rate < 0) + return; + + + struct nl_msg *msg = prepare_tcmsg(RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL, TC_H_ROOT, 0, 0); + const unsigned buffer = 2048; + + struct tc_tbf_qopt opt; + memset(&opt, 0, sizeof(opt)); + + /* Range check has been done in main() */ + opt.rate.rate = rate; + opt.limit = 0.05*rate + buffer; + opt.buffer = get_xmittime(opt.rate.rate, buffer); + + uint32_t rtab[256]; + complete_rate(&opt.rate, rtab); + + nla_put_string(msg, TCA_KIND, "tbf"); + + struct nlattr *opts = nla_nest_start(msg, TCA_OPTIONS); + nla_put(msg, TCA_TBF_PARMS, sizeof(opt), &opt); + nla_put(msg, TCA_TBF_BURST, sizeof(buffer), &buffer); + nla_put(msg, TCA_TBF_RTAB, sizeof(rtab), rtab); + nla_nest_end(msg, opts); + + do_send(msg, false); +} + + +static inline void usage(void) { + fprintf(stderr, "Usage: gluon-simple-tc |- |-\n"); + exit(1); +} + +static inline void maxrate(void) { + error(1, 0, "error: maximum allowed rate it about 2^25 Kbit/s"); +} + + +int main(int argc, char *argv[]) { + if (argc != 4) + usage(); + + double ingress = -1, egress = -1; + char *end; + + ifindex = if_nametoindex(argv[1]); + if (!ifindex) + error(1, 0, "invalid interface: %s", argv[1]); + + if (strcmp(argv[2], "-") != 0) { + ingress = strtod(argv[2], &end); + if (*end || ingress < 0) + usage(); + + ingress *= 125; + + if (ingress >= (1ull << 32)) + maxrate(); + } + + if (strcmp(argv[3], "-") != 0) { + egress = strtod(argv[3], &end); + if (*end || egress < 0) + usage(); + + egress *= 125; + + if (egress >= (1ull << 32)) + maxrate(); + } + + read_psched(); + + cb = nl_cb_alloc(NL_CB_DEFAULT); + nl_cb_err(cb, NL_CB_CUSTOM, error_handler, NULL); + + sock = nl_socket_alloc_cb(cb); + if (!sock) + exit_errno("nl_socket_alloc"); + + if (nl_connect(sock, NETLINK_ROUTE)) + exit_errno("nl_connect"); + + do_ingress(ingress); + do_egress(egress); + + nl_socket_free(sock); + nl_cb_put(cb); + + return 0; +} diff --git a/gluon/gluon-simple-tc/src/include/linux/pkt_cls.h b/gluon/gluon-simple-tc/src/include/linux/pkt_cls.h new file mode 100644 index 0000000..25731df --- /dev/null +++ b/gluon/gluon-simple-tc/src/include/linux/pkt_cls.h @@ -0,0 +1,483 @@ +#ifndef __LINUX_PKT_CLS_H +#define __LINUX_PKT_CLS_H + +#include +#include + +/* I think i could have done better macros ; for now this is stolen from + * some arch/mips code - jhs +*/ +#define _TC_MAKE32(x) ((x)) + +#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n)) +#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n)) +#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n)) +#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n)) + +/* verdict bit breakdown + * +bit 0: when set -> this packet has been munged already + +bit 1: when set -> It is ok to munge this packet + +bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded +assume loop + +bit 6,7: Where this packet was last seen +0: Above the transmit example at the socket level +1: on the Ingress +2: on the Egress + +bit 8: when set --> Request not to classify on ingress. + +bits 9,10,11: redirect counter - redirect TTL. Loop avoidance + + * + * */ + +#define TC_MUNGED _TC_MAKEMASK1(0) +#define SET_TC_MUNGED(v) ( TC_MUNGED | (v & ~TC_MUNGED)) +#define CLR_TC_MUNGED(v) ( v & ~TC_MUNGED) + +#define TC_OK2MUNGE _TC_MAKEMASK1(1) +#define SET_TC_OK2MUNGE(v) ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE)) +#define CLR_TC_OK2MUNGE(v) ( v & ~TC_OK2MUNGE) + +#define S_TC_VERD _TC_MAKE32(2) +#define M_TC_VERD _TC_MAKEMASK(4,S_TC_VERD) +#define G_TC_VERD(x) _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD) +#define V_TC_VERD(x) _TC_MAKEVALUE(x,S_TC_VERD) +#define SET_TC_VERD(v,n) ((V_TC_VERD(n)) | (v & ~M_TC_VERD)) + +#define S_TC_FROM _TC_MAKE32(6) +#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM) +#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM) +#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM) +#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM)) +#define AT_STACK 0x0 +#define AT_INGRESS 0x1 +#define AT_EGRESS 0x2 + +#define TC_NCLS _TC_MAKEMASK1(8) +#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS)) +#define CLR_TC_NCLS(v) ( v & ~TC_NCLS) + +#define S_TC_RTTL _TC_MAKE32(9) +#define M_TC_RTTL _TC_MAKEMASK(3,S_TC_RTTL) +#define G_TC_RTTL(x) _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL) +#define V_TC_RTTL(x) _TC_MAKEVALUE(x,S_TC_RTTL) +#define SET_TC_RTTL(v,n) ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL)) + +#define S_TC_AT _TC_MAKE32(12) +#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT) +#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT) +#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT) +#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT)) + +/* Action attributes */ +enum { + TCA_ACT_UNSPEC, + TCA_ACT_KIND, + TCA_ACT_OPTIONS, + TCA_ACT_INDEX, + TCA_ACT_STATS, + __TCA_ACT_MAX +}; + +#define TCA_ACT_MAX __TCA_ACT_MAX +#define TCA_OLD_COMPAT (TCA_ACT_MAX+1) +#define TCA_ACT_MAX_PRIO 32 +#define TCA_ACT_BIND 1 +#define TCA_ACT_NOBIND 0 +#define TCA_ACT_UNBIND 1 +#define TCA_ACT_NOUNBIND 0 +#define TCA_ACT_REPLACE 1 +#define TCA_ACT_NOREPLACE 0 +#define MAX_REC_LOOP 4 +#define MAX_RED_LOOP 4 + +#define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 +#define TC_ACT_RECLASSIFY 1 +#define TC_ACT_SHOT 2 +#define TC_ACT_PIPE 3 +#define TC_ACT_STOLEN 4 +#define TC_ACT_QUEUED 5 +#define TC_ACT_REPEAT 6 +#define TC_ACT_JUMP 0x10000000 + +/* Action type identifiers*/ +enum { + TCA_ID_UNSPEC=0, + TCA_ID_POLICE=1, + /* other actions go here */ + __TCA_ID_MAX=255 +}; + +#define TCA_ID_MAX __TCA_ID_MAX + +struct tc_police { + __u32 index; + int action; +#define TC_POLICE_UNSPEC TC_ACT_UNSPEC +#define TC_POLICE_OK TC_ACT_OK +#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY +#define TC_POLICE_SHOT TC_ACT_SHOT +#define TC_POLICE_PIPE TC_ACT_PIPE + + __u32 limit; + __u32 burst; + __u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; + int refcnt; + int bindcnt; + __u32 capab; +}; + +struct tcf_t { + __u64 install; + __u64 lastuse; + __u64 expires; +}; + +struct tc_cnt { + int refcnt; + int bindcnt; +}; + +#define tc_gen \ + __u32 index; \ + __u32 capab; \ + int action; \ + int refcnt; \ + int bindcnt + +enum { + TCA_POLICE_UNSPEC, + TCA_POLICE_TBF, + TCA_POLICE_RATE, + TCA_POLICE_PEAKRATE, + TCA_POLICE_AVRATE, + TCA_POLICE_RESULT, + __TCA_POLICE_MAX +#define TCA_POLICE_RESULT TCA_POLICE_RESULT +}; + +#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1) + +/* U32 filters */ + +#define TC_U32_HTID(h) ((h)&0xFFF00000) +#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20) +#define TC_U32_HASH(h) (((h)>>12)&0xFF) +#define TC_U32_NODE(h) ((h)&0xFFF) +#define TC_U32_KEY(h) ((h)&0xFFFFF) +#define TC_U32_UNSPEC 0 +#define TC_U32_ROOT (0xFFF00000) + +enum { + TCA_U32_UNSPEC, + TCA_U32_CLASSID, + TCA_U32_HASH, + TCA_U32_LINK, + TCA_U32_DIVISOR, + TCA_U32_SEL, + TCA_U32_POLICE, + TCA_U32_ACT, + TCA_U32_INDEV, + TCA_U32_PCNT, + TCA_U32_MARK, + __TCA_U32_MAX +}; + +#define TCA_U32_MAX (__TCA_U32_MAX - 1) + +struct tc_u32_key { + __be32 mask; + __be32 val; + int off; + int offmask; +}; + +struct tc_u32_sel { + unsigned char flags; + unsigned char offshift; + unsigned char nkeys; + + __be16 offmask; + __u16 off; + short offoff; + + short hoff; + __be32 hmask; + struct tc_u32_key keys[0]; +}; + +struct tc_u32_mark { + __u32 val; + __u32 mask; + __u32 success; +}; + +struct tc_u32_pcnt { + __u64 rcnt; + __u64 rhit; + __u64 kcnts[0]; +}; + +/* Flags */ + +#define TC_U32_TERMINAL 1 +#define TC_U32_OFFSET 2 +#define TC_U32_VAROFFSET 4 +#define TC_U32_EAT 8 + +#define TC_U32_MAXDEPTH 8 + + +/* RSVP filter */ + +enum { + TCA_RSVP_UNSPEC, + TCA_RSVP_CLASSID, + TCA_RSVP_DST, + TCA_RSVP_SRC, + TCA_RSVP_PINFO, + TCA_RSVP_POLICE, + TCA_RSVP_ACT, + __TCA_RSVP_MAX +}; + +#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) + +struct tc_rsvp_gpi { + __u32 key; + __u32 mask; + int offset; +}; + +struct tc_rsvp_pinfo { + struct tc_rsvp_gpi dpi; + struct tc_rsvp_gpi spi; + __u8 protocol; + __u8 tunnelid; + __u8 tunnelhdr; + __u8 pad; +}; + +/* ROUTE filter */ + +enum { + TCA_ROUTE4_UNSPEC, + TCA_ROUTE4_CLASSID, + TCA_ROUTE4_TO, + TCA_ROUTE4_FROM, + TCA_ROUTE4_IIF, + TCA_ROUTE4_POLICE, + TCA_ROUTE4_ACT, + __TCA_ROUTE4_MAX +}; + +#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1) + + +/* FW filter */ + +enum { + TCA_FW_UNSPEC, + TCA_FW_CLASSID, + TCA_FW_POLICE, + TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ + TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, + __TCA_FW_MAX +}; + +#define TCA_FW_MAX (__TCA_FW_MAX - 1) + +/* TC index filter */ + +enum { + TCA_TCINDEX_UNSPEC, + TCA_TCINDEX_HASH, + TCA_TCINDEX_MASK, + TCA_TCINDEX_SHIFT, + TCA_TCINDEX_FALL_THROUGH, + TCA_TCINDEX_CLASSID, + TCA_TCINDEX_POLICE, + TCA_TCINDEX_ACT, + __TCA_TCINDEX_MAX +}; + +#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) + +/* Flow filter */ + +enum { + FLOW_KEY_SRC, + FLOW_KEY_DST, + FLOW_KEY_PROTO, + FLOW_KEY_PROTO_SRC, + FLOW_KEY_PROTO_DST, + FLOW_KEY_IIF, + FLOW_KEY_PRIORITY, + FLOW_KEY_MARK, + FLOW_KEY_NFCT, + FLOW_KEY_NFCT_SRC, + FLOW_KEY_NFCT_DST, + FLOW_KEY_NFCT_PROTO_SRC, + FLOW_KEY_NFCT_PROTO_DST, + FLOW_KEY_RTCLASSID, + FLOW_KEY_SKUID, + FLOW_KEY_SKGID, + FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, + __FLOW_KEY_MAX, +}; + +#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1) + +enum { + FLOW_MODE_MAP, + FLOW_MODE_HASH, +}; + +enum { + TCA_FLOW_UNSPEC, + TCA_FLOW_KEYS, + TCA_FLOW_MODE, + TCA_FLOW_BASECLASS, + TCA_FLOW_RSHIFT, + TCA_FLOW_ADDEND, + TCA_FLOW_MASK, + TCA_FLOW_XOR, + TCA_FLOW_DIVISOR, + TCA_FLOW_ACT, + TCA_FLOW_POLICE, + TCA_FLOW_EMATCHES, + TCA_FLOW_PERTURB, + __TCA_FLOW_MAX +}; + +#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1) + +/* Basic filter */ + +enum { + TCA_BASIC_UNSPEC, + TCA_BASIC_CLASSID, + TCA_BASIC_EMATCHES, + TCA_BASIC_ACT, + TCA_BASIC_POLICE, + __TCA_BASIC_MAX +}; + +#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) + + +/* Cgroup classifier */ + +enum { + TCA_CGROUP_UNSPEC, + TCA_CGROUP_ACT, + TCA_CGROUP_POLICE, + TCA_CGROUP_EMATCHES, + __TCA_CGROUP_MAX, +}; + +#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) + +/* BPF classifier */ + +enum { + TCA_BPF_UNSPEC, + TCA_BPF_ACT, + TCA_BPF_POLICE, + TCA_BPF_CLASSID, + TCA_BPF_OPS_LEN, + TCA_BPF_OPS, + __TCA_BPF_MAX, +}; + +#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) + +/* Extended Matches */ + +struct tcf_ematch_tree_hdr { + __u16 nmatches; + __u16 progid; +}; + +enum { + TCA_EMATCH_TREE_UNSPEC, + TCA_EMATCH_TREE_HDR, + TCA_EMATCH_TREE_LIST, + __TCA_EMATCH_TREE_MAX +}; +#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1) + +struct tcf_ematch_hdr { + __u16 matchid; + __u16 kind; + __u16 flags; + __u16 pad; /* currently unused */ +}; + +/* 0 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + * +-----------------------+-+-+---+ + * | Unused |S|I| R | + * +-----------------------+-+-+---+ + * + * R(2) ::= relation to next ematch + * where: 0 0 END (last ematch) + * 0 1 AND + * 1 0 OR + * 1 1 Unused (invalid) + * I(1) ::= invert result + * S(1) ::= simple payload + */ +#define TCF_EM_REL_END 0 +#define TCF_EM_REL_AND (1<<0) +#define TCF_EM_REL_OR (1<<1) +#define TCF_EM_INVERT (1<<2) +#define TCF_EM_SIMPLE (1<<3) + +#define TCF_EM_REL_MASK 3 +#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK) + +enum { + TCF_LAYER_LINK, + TCF_LAYER_NETWORK, + TCF_LAYER_TRANSPORT, + __TCF_LAYER_MAX +}; +#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1) + +/* Ematch type assignments + * 1..32767 Reserved for ematches inside kernel tree + * 32768..65535 Free to use, not reliable + */ +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_VLAN 6 +#define TCF_EM_CANID 7 +#define TCF_EM_IPSET 8 +#define TCF_EM_MAX 8 + +enum { + TCF_EM_PROG_TC +}; + +enum { + TCF_EM_OPND_EQ, + TCF_EM_OPND_GT, + TCF_EM_OPND_LT +}; + +#endif diff --git a/gluon/gluon-simple-tc/src/include/linux/pkt_sched.h b/gluon/gluon-simple-tc/src/include/linux/pkt_sched.h new file mode 100644 index 0000000..d62316b --- /dev/null +++ b/gluon/gluon-simple-tc/src/include/linux/pkt_sched.h @@ -0,0 +1,846 @@ +#ifndef __LINUX_PKT_SCHED_H +#define __LINUX_PKT_SCHED_H + +#include + +/* Logical priority bands not depending on specific packet scheduler. + Every scheduler will map them to real traffic classes, if it has + no more precise mechanism to classify packets. + + These numbers have no special meaning, though their coincidence + with obsolete IPv6 values is not occasional :-). New IPv6 drafts + preferred full anarchy inspired by diffserv group. + + Note: TC_PRIO_BESTEFFORT does not mean that it is the most unhappy + class, actually, as rule it will be handled with more care than + filler or even bulk. + */ + +#define TC_PRIO_BESTEFFORT 0 +#define TC_PRIO_FILLER 1 +#define TC_PRIO_BULK 2 +#define TC_PRIO_INTERACTIVE_BULK 4 +#define TC_PRIO_INTERACTIVE 6 +#define TC_PRIO_CONTROL 7 + +#define TC_PRIO_MAX 15 + +/* Generic queue statistics, available for all the elements. + Particular schedulers may have also their private records. + */ + +struct tc_stats { + __u64 bytes; /* Number of enqueued bytes */ + __u32 packets; /* Number of enqueued packets */ + __u32 drops; /* Packets dropped because of lack of resources */ + __u32 overlimits; /* Number of throttle events when this + * flow goes out of allocated bandwidth */ + __u32 bps; /* Current flow byte rate */ + __u32 pps; /* Current flow packet rate */ + __u32 qlen; + __u32 backlog; +}; + +struct tc_estimator { + signed char interval; + unsigned char ewma_log; +}; + +/* "Handles" + --------- + + All the traffic control objects have 32bit identifiers, or "handles". + + They can be considered as opaque numbers from user API viewpoint, + but actually they always consist of two fields: major and + minor numbers, which are interpreted by kernel specially, + that may be used by applications, though not recommended. + + F.e. qdisc handles always have minor number equal to zero, + classes (or flows) have major equal to parent qdisc major, and + minor uniquely identifying class inside qdisc. + + Macros to manipulate handles: + */ + +#define TC_H_MAJ_MASK (0xFFFF0000U) +#define TC_H_MIN_MASK (0x0000FFFFU) +#define TC_H_MAJ(h) ((h)&TC_H_MAJ_MASK) +#define TC_H_MIN(h) ((h)&TC_H_MIN_MASK) +#define TC_H_MAKE(maj,min) (((maj)&TC_H_MAJ_MASK)|((min)&TC_H_MIN_MASK)) + +#define TC_H_UNSPEC (0U) +#define TC_H_ROOT (0xFFFFFFFFU) +#define TC_H_INGRESS (0xFFFFFFF1U) + +/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */ +enum tc_link_layer { + TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */ + TC_LINKLAYER_ETHERNET, + TC_LINKLAYER_ATM, +}; +#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */ + +struct tc_ratespec { + unsigned char cell_log; + __u8 linklayer; /* lower 4 bits */ + unsigned short overhead; + short cell_align; + unsigned short mpu; + __u32 rate; +}; + +#define TC_RTAB_SIZE 1024 + +struct tc_sizespec { + unsigned char cell_log; + unsigned char size_log; + short cell_align; + int overhead; + unsigned int linklayer; + unsigned int mpu; + unsigned int mtu; + unsigned int tsize; +}; + +enum { + TCA_STAB_UNSPEC, + TCA_STAB_BASE, + TCA_STAB_DATA, + __TCA_STAB_MAX +}; + +#define TCA_STAB_MAX (__TCA_STAB_MAX - 1) + +/* FIFO section */ + +struct tc_fifo_qopt { + __u32 limit; /* Queue length: bytes for bfifo, packets for pfifo */ +}; + +/* PRIO section */ + +#define TCQ_PRIO_BANDS 16 +#define TCQ_MIN_PRIO_BANDS 2 + +struct tc_prio_qopt { + int bands; /* Number of bands */ + __u8 priomap[TC_PRIO_MAX+1]; /* Map: logical priority -> PRIO band */ +}; + +/* MULTIQ section */ + +struct tc_multiq_qopt { + __u16 bands; /* Number of bands */ + __u16 max_bands; /* Maximum number of queues */ +}; + +/* PLUG section */ + +#define TCQ_PLUG_BUFFER 0 +#define TCQ_PLUG_RELEASE_ONE 1 +#define TCQ_PLUG_RELEASE_INDEFINITE 2 +#define TCQ_PLUG_LIMIT 3 + +struct tc_plug_qopt { + /* TCQ_PLUG_BUFFER: Inset a plug into the queue and + * buffer any incoming packets + * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head + * to beginning of the next plug. + * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue. + * Stop buffering packets until the next TCQ_PLUG_BUFFER + * command is received (just act as a pass-thru queue). + * TCQ_PLUG_LIMIT: Increase/decrease queue size + */ + int action; + __u32 limit; +}; + +/* TBF section */ + +struct tc_tbf_qopt { + struct tc_ratespec rate; + struct tc_ratespec peakrate; + __u32 limit; + __u32 buffer; + __u32 mtu; +}; + +enum { + TCA_TBF_UNSPEC, + TCA_TBF_PARMS, + TCA_TBF_RTAB, + TCA_TBF_PTAB, + TCA_TBF_RATE64, + TCA_TBF_PRATE64, + TCA_TBF_BURST, + TCA_TBF_PBURST, + __TCA_TBF_MAX, +}; + +#define TCA_TBF_MAX (__TCA_TBF_MAX - 1) + + +/* TEQL section */ + +/* TEQL does not require any parameters */ + +/* SFQ section */ + +struct tc_sfq_qopt { + unsigned quantum; /* Bytes per round allocated to flow */ + int perturb_period; /* Period of hash perturbation */ + __u32 limit; /* Maximal packets in queue */ + unsigned divisor; /* Hash divisor */ + unsigned flows; /* Maximal number of flows */ +}; + +struct tc_sfqred_stats { + __u32 prob_drop; /* Early drops, below max threshold */ + __u32 forced_drop; /* Early drops, after max threshold */ + __u32 prob_mark; /* Marked packets, below max threshold */ + __u32 forced_mark; /* Marked packets, after max threshold */ + __u32 prob_mark_head; /* Marked packets, below max threshold */ + __u32 forced_mark_head;/* Marked packets, after max threshold */ +}; + +struct tc_sfq_qopt_v1 { + struct tc_sfq_qopt v0; + unsigned int depth; /* max number of packets per flow */ + unsigned int headdrop; +/* SFQRED parameters */ + __u32 limit; /* HARD maximal flow queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; + __u32 max_P; /* probability, high resolution */ +/* SFQRED stats */ + struct tc_sfqred_stats stats; +}; + + +struct tc_sfq_xstats { + __s32 allot; +}; + +/* RED section */ + +enum { + TCA_RED_UNSPEC, + TCA_RED_PARMS, + TCA_RED_STAB, + TCA_RED_MAX_P, + __TCA_RED_MAX, +}; + +#define TCA_RED_MAX (__TCA_RED_MAX - 1) + +struct tc_red_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; +#define TC_RED_ECN 1 +#define TC_RED_HARDDROP 2 +#define TC_RED_ADAPTATIVE 4 +}; + +struct tc_red_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ +}; + +/* GRED section */ + +#define MAX_DPs 16 + +enum { + TCA_GRED_UNSPEC, + TCA_GRED_PARMS, + TCA_GRED_STAB, + TCA_GRED_DPS, + TCA_GRED_MAX_P, + __TCA_GRED_MAX, +}; + +#define TCA_GRED_MAX (__TCA_GRED_MAX - 1) + +struct tc_gred_qopt { + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + __u32 DP; /* up to 2^32 DPs */ + __u32 backlog; + __u32 qave; + __u32 forced; + __u32 early; + __u32 other; + __u32 pdrop; + __u8 Wlog; /* log(W) */ + __u8 Plog; /* log(P_max/(qth_max-qth_min)) */ + __u8 Scell_log; /* cell size for idle damping */ + __u8 prio; /* prio of this VQ */ + __u32 packets; + __u32 bytesin; +}; + +/* gred setup */ +struct tc_gred_sopt { + __u32 DPs; + __u32 def_DP; + __u8 grio; + __u8 flags; + __u16 pad1; +}; + +/* CHOKe section */ + +enum { + TCA_CHOKE_UNSPEC, + TCA_CHOKE_PARMS, + TCA_CHOKE_STAB, + TCA_CHOKE_MAX_P, + __TCA_CHOKE_MAX, +}; + +#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1) + +struct tc_choke_qopt { + __u32 limit; /* Hard queue length (packets) */ + __u32 qth_min; /* Min average threshold (packets) */ + __u32 qth_max; /* Max average threshold (packets) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; /* see RED flags */ +}; + +struct tc_choke_xstats { + __u32 early; /* Early drops */ + __u32 pdrop; /* Drops due to queue limits */ + __u32 other; /* Drops due to drop() calls */ + __u32 marked; /* Marked packets */ + __u32 matched; /* Drops due to flow match */ +}; + +/* HTB section */ +#define TC_HTB_NUMPRIO 8 +#define TC_HTB_MAXDEPTH 8 +#define TC_HTB_PROTOVER 3 /* the same as HTB and TC's major */ + +struct tc_htb_opt { + struct tc_ratespec rate; + struct tc_ratespec ceil; + __u32 buffer; + __u32 cbuffer; + __u32 quantum; + __u32 level; /* out only */ + __u32 prio; +}; +struct tc_htb_glob { + __u32 version; /* to match HTB/TC */ + __u32 rate2quantum; /* bps->quantum divisor */ + __u32 defcls; /* default class number */ + __u32 debug; /* debug flags */ + + /* stats */ + __u32 direct_pkts; /* count of non shaped packets */ +}; +enum { + TCA_HTB_UNSPEC, + TCA_HTB_PARMS, + TCA_HTB_INIT, + TCA_HTB_CTAB, + TCA_HTB_RTAB, + TCA_HTB_DIRECT_QLEN, + TCA_HTB_RATE64, + TCA_HTB_CEIL64, + __TCA_HTB_MAX, +}; + +#define TCA_HTB_MAX (__TCA_HTB_MAX - 1) + +struct tc_htb_xstats { + __u32 lends; + __u32 borrows; + __u32 giants; /* too big packets (rate will not be accurate) */ + __u32 tokens; + __u32 ctokens; +}; + +/* HFSC section */ + +struct tc_hfsc_qopt { + __u16 defcls; /* default class */ +}; + +struct tc_service_curve { + __u32 m1; /* slope of the first segment in bps */ + __u32 d; /* x-projection of the first segment in us */ + __u32 m2; /* slope of the second segment in bps */ +}; + +struct tc_hfsc_stats { + __u64 work; /* total work done */ + __u64 rtwork; /* work done by real-time criteria */ + __u32 period; /* current period */ + __u32 level; /* class level in hierarchy */ +}; + +enum { + TCA_HFSC_UNSPEC, + TCA_HFSC_RSC, + TCA_HFSC_FSC, + TCA_HFSC_USC, + __TCA_HFSC_MAX, +}; + +#define TCA_HFSC_MAX (__TCA_HFSC_MAX - 1) + + +/* CBQ section */ + +#define TC_CBQ_MAXPRIO 8 +#define TC_CBQ_MAXLEVEL 8 +#define TC_CBQ_DEF_EWMA 5 + +struct tc_cbq_lssopt { + unsigned char change; + unsigned char flags; +#define TCF_CBQ_LSS_BOUNDED 1 +#define TCF_CBQ_LSS_ISOLATED 2 + unsigned char ewma_log; + unsigned char level; +#define TCF_CBQ_LSS_FLAGS 1 +#define TCF_CBQ_LSS_EWMA 2 +#define TCF_CBQ_LSS_MAXIDLE 4 +#define TCF_CBQ_LSS_MINIDLE 8 +#define TCF_CBQ_LSS_OFFTIME 0x10 +#define TCF_CBQ_LSS_AVPKT 0x20 + __u32 maxidle; + __u32 minidle; + __u32 offtime; + __u32 avpkt; +}; + +struct tc_cbq_wrropt { + unsigned char flags; + unsigned char priority; + unsigned char cpriority; + unsigned char __reserved; + __u32 allot; + __u32 weight; +}; + +struct tc_cbq_ovl { + unsigned char strategy; +#define TC_CBQ_OVL_CLASSIC 0 +#define TC_CBQ_OVL_DELAY 1 +#define TC_CBQ_OVL_LOWPRIO 2 +#define TC_CBQ_OVL_DROP 3 +#define TC_CBQ_OVL_RCLASSIC 4 + unsigned char priority2; + __u16 pad; + __u32 penalty; +}; + +struct tc_cbq_police { + unsigned char police; + unsigned char __res1; + unsigned short __res2; +}; + +struct tc_cbq_fopt { + __u32 split; + __u32 defmap; + __u32 defchange; +}; + +struct tc_cbq_xstats { + __u32 borrows; + __u32 overactions; + __s32 avgidle; + __s32 undertime; +}; + +enum { + TCA_CBQ_UNSPEC, + TCA_CBQ_LSSOPT, + TCA_CBQ_WRROPT, + TCA_CBQ_FOPT, + TCA_CBQ_OVL_STRATEGY, + TCA_CBQ_RATE, + TCA_CBQ_RTAB, + TCA_CBQ_POLICE, + __TCA_CBQ_MAX, +}; + +#define TCA_CBQ_MAX (__TCA_CBQ_MAX - 1) + +/* dsmark section */ + +enum { + TCA_DSMARK_UNSPEC, + TCA_DSMARK_INDICES, + TCA_DSMARK_DEFAULT_INDEX, + TCA_DSMARK_SET_TC_INDEX, + TCA_DSMARK_MASK, + TCA_DSMARK_VALUE, + __TCA_DSMARK_MAX, +}; + +#define TCA_DSMARK_MAX (__TCA_DSMARK_MAX - 1) + +/* ATM section */ + +enum { + TCA_ATM_UNSPEC, + TCA_ATM_FD, /* file/socket descriptor */ + TCA_ATM_PTR, /* pointer to descriptor - later */ + TCA_ATM_HDR, /* LL header */ + TCA_ATM_EXCESS, /* excess traffic class (0 for CLP) */ + TCA_ATM_ADDR, /* PVC address (for output only) */ + TCA_ATM_STATE, /* VC state (ATM_VS_*; for output only) */ + __TCA_ATM_MAX, +}; + +#define TCA_ATM_MAX (__TCA_ATM_MAX - 1) + +/* Network emulator */ + +enum { + TCA_NETEM_UNSPEC, + TCA_NETEM_CORR, + TCA_NETEM_DELAY_DIST, + TCA_NETEM_REORDER, + TCA_NETEM_CORRUPT, + TCA_NETEM_LOSS, + TCA_NETEM_RATE, + TCA_NETEM_ECN, + TCA_NETEM_RATE64, + __TCA_NETEM_MAX, +}; + +#define TCA_NETEM_MAX (__TCA_NETEM_MAX - 1) + +struct tc_netem_qopt { + __u32 latency; /* added delay (us) */ + __u32 limit; /* fifo limit (packets) */ + __u32 loss; /* random packet loss (0=none ~0=100%) */ + __u32 gap; /* re-ordering gap (0 for none) */ + __u32 duplicate; /* random packet dup (0=none ~0=100%) */ + __u32 jitter; /* random jitter in latency (us) */ +}; + +struct tc_netem_corr { + __u32 delay_corr; /* delay correlation */ + __u32 loss_corr; /* packet loss correlation */ + __u32 dup_corr; /* duplicate correlation */ +}; + +struct tc_netem_reorder { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_corrupt { + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_rate { + __u32 rate; /* byte/s */ + __s32 packet_overhead; + __u32 cell_size; + __s32 cell_overhead; +}; + +enum { + NETEM_LOSS_UNSPEC, + NETEM_LOSS_GI, /* General Intuitive - 4 state model */ + NETEM_LOSS_GE, /* Gilbert Elliot models */ + __NETEM_LOSS_MAX +}; +#define NETEM_LOSS_MAX (__NETEM_LOSS_MAX - 1) + +/* State transition probabilities for 4 state model */ +struct tc_netem_gimodel { + __u32 p13; + __u32 p31; + __u32 p32; + __u32 p14; + __u32 p23; +}; + +/* Gilbert-Elliot models */ +struct tc_netem_gemodel { + __u32 p; + __u32 r; + __u32 h; + __u32 k1; +}; + +#define NETEM_DIST_SCALE 8192 +#define NETEM_DIST_MAX 16384 + +/* DRR */ + +enum { + TCA_DRR_UNSPEC, + TCA_DRR_QUANTUM, + __TCA_DRR_MAX +}; + +#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) + +struct tc_drr_stats { + __u32 deficit; +}; + +/* MQPRIO */ +#define TC_QOPT_BITMASK 15 +#define TC_QOPT_MAX_QUEUE 16 + +struct tc_mqprio_qopt { + __u8 num_tc; + __u8 prio_tc_map[TC_QOPT_BITMASK + 1]; + __u8 hw; + __u16 count[TC_QOPT_MAX_QUEUE]; + __u16 offset[TC_QOPT_MAX_QUEUE]; +}; + +/* SFB */ + +enum { + TCA_SFB_UNSPEC, + TCA_SFB_PARMS, + __TCA_SFB_MAX, +}; + +#define TCA_SFB_MAX (__TCA_SFB_MAX - 1) + +/* + * Note: increment, decrement are Q0.16 fixed-point values. + */ +struct tc_sfb_qopt { + __u32 rehash_interval; /* delay between hash move, in ms */ + __u32 warmup_time; /* double buffering warmup time in ms (warmup_time < rehash_interval) */ + __u32 max; /* max len of qlen_min */ + __u32 bin_size; /* maximum queue length per bin */ + __u32 increment; /* probability increment, (d1 in Blue) */ + __u32 decrement; /* probability decrement, (d2 in Blue) */ + __u32 limit; /* max SFB queue length */ + __u32 penalty_rate; /* inelastic flows are rate limited to 'rate' pps */ + __u32 penalty_burst; +}; + +struct tc_sfb_xstats { + __u32 earlydrop; + __u32 penaltydrop; + __u32 bucketdrop; + __u32 queuedrop; + __u32 childdrop; /* drops in child qdisc */ + __u32 marked; + __u32 maxqlen; + __u32 maxprob; + __u32 avgprob; +}; + +#define SFB_MAX_PROB 0xFFFF + +/* QFQ */ +enum { + TCA_QFQ_UNSPEC, + TCA_QFQ_WEIGHT, + TCA_QFQ_LMAX, + __TCA_QFQ_MAX +}; + +#define TCA_QFQ_MAX (__TCA_QFQ_MAX - 1) + +struct tc_qfq_stats { + __u32 weight; + __u32 lmax; +}; + +/* CODEL */ + +enum { + TCA_CODEL_UNSPEC, + TCA_CODEL_TARGET, + TCA_CODEL_LIMIT, + TCA_CODEL_INTERVAL, + TCA_CODEL_ECN, + __TCA_CODEL_MAX +}; + +#define TCA_CODEL_MAX (__TCA_CODEL_MAX - 1) + +struct tc_codel_xstats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 count; /* how many drops we've done since the last time we + * entered dropping state + */ + __u32 lastcount; /* count at entry to dropping state */ + __u32 ldelay; /* in-queue delay seen by most recently dequeued packet */ + __s32 drop_next; /* time to drop next packet */ + __u32 drop_overlimit; /* number of time max qdisc packet limit was hit */ + __u32 ecn_mark; /* number of packets we ECN marked instead of dropped */ + __u32 dropping; /* are we in dropping state ? */ +}; + +/* FQ_CODEL */ + +enum { + TCA_FQ_CODEL_UNSPEC, + TCA_FQ_CODEL_TARGET, + TCA_FQ_CODEL_LIMIT, + TCA_FQ_CODEL_INTERVAL, + TCA_FQ_CODEL_ECN, + TCA_FQ_CODEL_FLOWS, + TCA_FQ_CODEL_QUANTUM, + __TCA_FQ_CODEL_MAX +}; + +#define TCA_FQ_CODEL_MAX (__TCA_FQ_CODEL_MAX - 1) + +enum { + TCA_FQ_CODEL_XSTATS_QDISC, + TCA_FQ_CODEL_XSTATS_CLASS, +}; + +struct tc_fq_codel_qd_stats { + __u32 maxpacket; /* largest packet we've seen so far */ + __u32 drop_overlimit; /* number of time max qdisc + * packet limit was hit + */ + __u32 ecn_mark; /* number of packets we ECN marked + * instead of being dropped + */ + __u32 new_flow_count; /* number of time packets + * created a 'new flow' + */ + __u32 new_flows_len; /* count of flows in new list */ + __u32 old_flows_len; /* count of flows in old list */ +}; + +struct tc_fq_codel_cl_stats { + __s32 deficit; + __u32 ldelay; /* in-queue delay seen by most recently + * dequeued packet + */ + __u32 count; + __u32 lastcount; + __u32 dropping; + __s32 drop_next; +}; + +struct tc_fq_codel_xstats { + __u32 type; + union { + struct tc_fq_codel_qd_stats qdisc_stats; + struct tc_fq_codel_cl_stats class_stats; + }; +}; + +/* FQ */ + +enum { + TCA_FQ_UNSPEC, + + TCA_FQ_PLIMIT, /* limit of total number of packets in queue */ + + TCA_FQ_FLOW_PLIMIT, /* limit of packets per flow */ + + TCA_FQ_QUANTUM, /* RR quantum */ + + TCA_FQ_INITIAL_QUANTUM, /* RR quantum for new flow */ + + TCA_FQ_RATE_ENABLE, /* enable/disable rate limiting */ + + TCA_FQ_FLOW_DEFAULT_RATE,/* obsolete, do not use */ + + TCA_FQ_FLOW_MAX_RATE, /* per flow max rate */ + + TCA_FQ_BUCKETS_LOG, /* log2(number of buckets) */ + + TCA_FQ_FLOW_REFILL_DELAY, /* flow credit refill delay in usec */ + + __TCA_FQ_MAX +}; + +#define TCA_FQ_MAX (__TCA_FQ_MAX - 1) + +struct tc_fq_qd_stats { + __u64 gc_flows; + __u64 highprio_packets; + __u64 tcp_retrans; + __u64 throttled; + __u64 flows_plimit; + __u64 pkts_too_long; + __u64 allocation_errors; + __s64 time_next_delayed_flow; + __u32 flows; + __u32 inactive_flows; + __u32 throttled_flows; + __u32 pad; +}; + +/* Heavy-Hitter Filter */ + +enum { + TCA_HHF_UNSPEC, + TCA_HHF_BACKLOG_LIMIT, + TCA_HHF_QUANTUM, + TCA_HHF_HH_FLOWS_LIMIT, + TCA_HHF_RESET_TIMEOUT, + TCA_HHF_ADMIT_BYTES, + TCA_HHF_EVICT_TIMEOUT, + TCA_HHF_NON_HH_WEIGHT, + __TCA_HHF_MAX +}; + +#define TCA_HHF_MAX (__TCA_HHF_MAX - 1) + +struct tc_hhf_xstats { + __u32 drop_overlimit; /* number of times max qdisc packet limit + * was hit + */ + __u32 hh_overlimit; /* number of times max heavy-hitters was hit */ + __u32 hh_tot_count; /* number of captured heavy-hitters so far */ + __u32 hh_cur_count; /* number of current heavy-hitters */ +}; + +/* PIE */ +enum { + TCA_PIE_UNSPEC, + TCA_PIE_TARGET, + TCA_PIE_LIMIT, + TCA_PIE_TUPDATE, + TCA_PIE_ALPHA, + TCA_PIE_BETA, + TCA_PIE_ECN, + TCA_PIE_BYTEMODE, + __TCA_PIE_MAX +}; +#define TCA_PIE_MAX (__TCA_PIE_MAX - 1) + +struct tc_pie_xstats { + __u32 prob; /* current probability */ + __u32 delay; /* current delay in ms */ + __u32 avg_dq_rate; /* current average dq_rate in bits/pie_time */ + __u32 packets_in; /* total number of packets enqueued */ + __u32 dropped; /* packets dropped due to pie_action */ + __u32 overlimit; /* dropped due to lack of space in queue */ + __u32 maxq; /* maximum queue size */ + __u32 ecn_mark; /* packets marked with ecn*/ +}; +#endif diff --git a/gluon/gluon-simple-tc/src/include/linux/rtnetlink.h b/gluon/gluon-simple-tc/src/include/linux/rtnetlink.h new file mode 100644 index 0000000..248fdd3 --- /dev/null +++ b/gluon/gluon-simple-tc/src/include/linux/rtnetlink.h @@ -0,0 +1,639 @@ +#ifndef __LINUX_RTNETLINK_H +#define __LINUX_RTNETLINK_H + +#include +#include +#include +#include +#include + +/* rtnetlink families. Values up to 127 are reserved for real address + * families, values above 128 may be used arbitrarily. + */ +#define RTNL_FAMILY_IPMR 128 +#define RTNL_FAMILY_IP6MR 129 +#define RTNL_FAMILY_MAX 129 + +/**** + * Routing/neighbour discovery messages. + ****/ + +/* Types of messages */ + +enum { + RTM_BASE = 16, +#define RTM_BASE RTM_BASE + + RTM_NEWLINK = 16, +#define RTM_NEWLINK RTM_NEWLINK + RTM_DELLINK, +#define RTM_DELLINK RTM_DELLINK + RTM_GETLINK, +#define RTM_GETLINK RTM_GETLINK + RTM_SETLINK, +#define RTM_SETLINK RTM_SETLINK + + RTM_NEWADDR = 20, +#define RTM_NEWADDR RTM_NEWADDR + RTM_DELADDR, +#define RTM_DELADDR RTM_DELADDR + RTM_GETADDR, +#define RTM_GETADDR RTM_GETADDR + + RTM_NEWROUTE = 24, +#define RTM_NEWROUTE RTM_NEWROUTE + RTM_DELROUTE, +#define RTM_DELROUTE RTM_DELROUTE + RTM_GETROUTE, +#define RTM_GETROUTE RTM_GETROUTE + + RTM_NEWNEIGH = 28, +#define RTM_NEWNEIGH RTM_NEWNEIGH + RTM_DELNEIGH, +#define RTM_DELNEIGH RTM_DELNEIGH + RTM_GETNEIGH, +#define RTM_GETNEIGH RTM_GETNEIGH + + RTM_NEWRULE = 32, +#define RTM_NEWRULE RTM_NEWRULE + RTM_DELRULE, +#define RTM_DELRULE RTM_DELRULE + RTM_GETRULE, +#define RTM_GETRULE RTM_GETRULE + + RTM_NEWQDISC = 36, +#define RTM_NEWQDISC RTM_NEWQDISC + RTM_DELQDISC, +#define RTM_DELQDISC RTM_DELQDISC + RTM_GETQDISC, +#define RTM_GETQDISC RTM_GETQDISC + + RTM_NEWTCLASS = 40, +#define RTM_NEWTCLASS RTM_NEWTCLASS + RTM_DELTCLASS, +#define RTM_DELTCLASS RTM_DELTCLASS + RTM_GETTCLASS, +#define RTM_GETTCLASS RTM_GETTCLASS + + RTM_NEWTFILTER = 44, +#define RTM_NEWTFILTER RTM_NEWTFILTER + RTM_DELTFILTER, +#define RTM_DELTFILTER RTM_DELTFILTER + RTM_GETTFILTER, +#define RTM_GETTFILTER RTM_GETTFILTER + + RTM_NEWACTION = 48, +#define RTM_NEWACTION RTM_NEWACTION + RTM_DELACTION, +#define RTM_DELACTION RTM_DELACTION + RTM_GETACTION, +#define RTM_GETACTION RTM_GETACTION + + RTM_NEWPREFIX = 52, +#define RTM_NEWPREFIX RTM_NEWPREFIX + + RTM_GETMULTICAST = 58, +#define RTM_GETMULTICAST RTM_GETMULTICAST + + RTM_GETANYCAST = 62, +#define RTM_GETANYCAST RTM_GETANYCAST + + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + RTM_NEWNDUSEROPT = 68, +#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT + + RTM_NEWADDRLABEL = 72, +#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL + RTM_DELADDRLABEL, +#define RTM_DELADDRLABEL RTM_DELADDRLABEL + RTM_GETADDRLABEL, +#define RTM_GETADDRLABEL RTM_GETADDRLABEL + + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) +}; + +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + +/* + Generic structure for encapsulation of optional route information. + It is reminiscent of sockaddr, but with sa_family replaced + with attribute type. + */ + +struct rtattr { + unsigned short rta_len; + unsigned short rta_type; +}; + +/* Macros to handle rtattributes */ + +#define RTA_ALIGNTO 4 +#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) +#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \ + (rta)->rta_len >= sizeof(struct rtattr) && \ + (rta)->rta_len <= (len)) +#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ + (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) +#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) +#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len)) +#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0))) +#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + + + + +/****************************************************************************** + * Definitions used in routing table administration. + ****/ + +struct rtmsg { + unsigned char rtm_family; + unsigned char rtm_dst_len; + unsigned char rtm_src_len; + unsigned char rtm_tos; + + unsigned char rtm_table; /* Routing table id */ + unsigned char rtm_protocol; /* Routing protocol; see below */ + unsigned char rtm_scope; /* See below */ + unsigned char rtm_type; /* See below */ + + unsigned rtm_flags; +}; + +/* rtm_type */ + +enum { + RTN_UNSPEC, + RTN_UNICAST, /* Gateway or direct route */ + RTN_LOCAL, /* Accept locally */ + RTN_BROADCAST, /* Accept locally as broadcast, + send as broadcast */ + RTN_ANYCAST, /* Accept locally as broadcast, + but send as unicast */ + RTN_MULTICAST, /* Multicast route */ + RTN_BLACKHOLE, /* Drop */ + RTN_UNREACHABLE, /* Destination is unreachable */ + RTN_PROHIBIT, /* Administratively prohibited */ + RTN_THROW, /* Not in this table */ + RTN_NAT, /* Translate this address */ + RTN_XRESOLVE, /* Use external resolver */ + __RTN_MAX +}; + +#define RTN_MAX (__RTN_MAX - 1) + + +/* rtm_protocol */ + +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ + +/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; + they are just passed from user and back as is. + It will be used by hypothetical multiple routing daemons. + Note that protocol values should be standardized in order to + avoid conflicts. + */ + +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ + +/* rtm_scope + + Really it is not scope, but sort of distance to the destination. + NOWHERE are reserved for not existing destinations, HOST is our + local addresses, LINK are destinations, located on directly attached + link and UNIVERSE is everywhere in the Universe. + + Intermediate values are also possible f.e. interior routes + could be assigned a value between UNIVERSE and LINK. +*/ + +enum rt_scope_t { + RT_SCOPE_UNIVERSE=0, +/* User defined values */ + RT_SCOPE_SITE=200, + RT_SCOPE_LINK=253, + RT_SCOPE_HOST=254, + RT_SCOPE_NOWHERE=255 +}; + +/* rtm_flags */ + +#define RTM_F_NOTIFY 0x100 /* Notify user of route change */ +#define RTM_F_CLONED 0x200 /* This route is cloned */ +#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ +#define RTM_F_PREFIX 0x800 /* Prefix addresses */ + +/* Reserved table identifiers */ + +enum rt_class_t { + RT_TABLE_UNSPEC=0, +/* User defined values */ + RT_TABLE_COMPAT=252, + RT_TABLE_DEFAULT=253, + RT_TABLE_MAIN=254, + RT_TABLE_LOCAL=255, + RT_TABLE_MAX=0xFFFFFFFF +}; + + +/* Routing message attributes */ + +enum rtattr_type_t { + RTA_UNSPEC, + RTA_DST, + RTA_SRC, + RTA_IIF, + RTA_OIF, + RTA_GATEWAY, + RTA_PRIORITY, + RTA_PREFSRC, + RTA_METRICS, + RTA_MULTIPATH, + RTA_PROTOINFO, /* no longer used */ + RTA_FLOW, + RTA_CACHEINFO, + RTA_SESSION, /* no longer used */ + RTA_MP_ALGO, /* no longer used */ + RTA_TABLE, + RTA_MARK, + RTA_MFC_STATS, + __RTA_MAX +}; + +#define RTA_MAX (__RTA_MAX - 1) + +#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)))) +#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg)) + +/* RTM_MULTIPATH --- array of struct rtnexthop. + * + * "struct rtnexthop" describes all necessary nexthop information, + * i.e. parameters of path to a destination via this nexthop. + * + * At the moment it is impossible to set different prefsrc, mtu, window + * and rtt for different paths from multipath. + */ + +struct rtnexthop { + unsigned short rtnh_len; + unsigned char rtnh_flags; + unsigned char rtnh_hops; + int rtnh_ifindex; +}; + +/* rtnh_flags */ + +#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ +#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ +#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ + +/* Macros to handle hexthops */ + +#define RTNH_ALIGNTO 4 +#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) ) +#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \ + ((int)(rtnh)->rtnh_len) <= (len)) +#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len))) +#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len)) +#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) +#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) + +/* RTM_CACHEINFO */ + +struct rta_cacheinfo { + __u32 rta_clntref; + __u32 rta_lastuse; + __s32 rta_expires; + __u32 rta_error; + __u32 rta_used; + +#define RTNETLINK_HAVE_PEERINFO 1 + __u32 rta_id; + __u32 rta_ts; + __u32 rta_tsage; +}; + +/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ + +enum { + RTAX_UNSPEC, +#define RTAX_UNSPEC RTAX_UNSPEC + RTAX_LOCK, +#define RTAX_LOCK RTAX_LOCK + RTAX_MTU, +#define RTAX_MTU RTAX_MTU + RTAX_WINDOW, +#define RTAX_WINDOW RTAX_WINDOW + RTAX_RTT, +#define RTAX_RTT RTAX_RTT + RTAX_RTTVAR, +#define RTAX_RTTVAR RTAX_RTTVAR + RTAX_SSTHRESH, +#define RTAX_SSTHRESH RTAX_SSTHRESH + RTAX_CWND, +#define RTAX_CWND RTAX_CWND + RTAX_ADVMSS, +#define RTAX_ADVMSS RTAX_ADVMSS + RTAX_REORDERING, +#define RTAX_REORDERING RTAX_REORDERING + RTAX_HOPLIMIT, +#define RTAX_HOPLIMIT RTAX_HOPLIMIT + RTAX_INITCWND, +#define RTAX_INITCWND RTAX_INITCWND + RTAX_FEATURES, +#define RTAX_FEATURES RTAX_FEATURES + RTAX_RTO_MIN, +#define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND + RTAX_QUICKACK, +#define RTAX_QUICKACK RTAX_QUICKACK + __RTAX_MAX +}; + +#define RTAX_MAX (__RTAX_MAX - 1) + +#define RTAX_FEATURE_ECN 0x00000001 +#define RTAX_FEATURE_SACK 0x00000002 +#define RTAX_FEATURE_TIMESTAMP 0x00000004 +#define RTAX_FEATURE_ALLFRAG 0x00000008 + +struct rta_session { + __u8 proto; + __u8 pad1; + __u16 pad2; + + union { + struct { + __u16 sport; + __u16 dport; + } ports; + + struct { + __u8 type; + __u8 code; + __u16 ident; + } icmpt; + + __u32 spi; + } u; +}; + +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + +/**** + * General form of address family dependent message. + ****/ + +struct rtgenmsg { + unsigned char rtgen_family; +}; + +/***************************************************************** + * Link layer specific messages. + ****/ + +/* struct ifinfomsg + * passes link level specific information, not dependent + * on network protocol. + */ + +struct ifinfomsg { + unsigned char ifi_family; + unsigned char __ifi_pad; + unsigned short ifi_type; /* ARPHRD_* */ + int ifi_index; /* Link index */ + unsigned ifi_flags; /* IFF_* flags */ + unsigned ifi_change; /* IFF_* change mask */ +}; + +/******************************************************************** + * prefix information + ****/ + +struct prefixmsg { + unsigned char prefix_family; + unsigned char prefix_pad1; + unsigned short prefix_pad2; + int prefix_ifindex; + unsigned char prefix_type; + unsigned char prefix_len; + unsigned char prefix_flags; + unsigned char prefix_pad3; +}; + +enum +{ + PREFIX_UNSPEC, + PREFIX_ADDRESS, + PREFIX_CACHEINFO, + __PREFIX_MAX +}; + +#define PREFIX_MAX (__PREFIX_MAX - 1) + +struct prefix_cacheinfo { + __u32 preferred_time; + __u32 valid_time; +}; + + +/***************************************************************** + * Traffic control messages. + ****/ + +struct tcmsg { + unsigned char tcm_family; + unsigned char tcm__pad1; + unsigned short tcm__pad2; + int tcm_ifindex; + __u32 tcm_handle; + __u32 tcm_parent; + __u32 tcm_info; +}; + +enum { + TCA_UNSPEC, + TCA_KIND, + TCA_OPTIONS, + TCA_STATS, + TCA_XSTATS, + TCA_RATE, + TCA_FCNT, + TCA_STATS2, + TCA_STAB, + __TCA_MAX +}; + +#define TCA_MAX (__TCA_MAX - 1) + +#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) +#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) + +/******************************************************************** + * Neighbor Discovery userland options + ****/ + +struct nduseroptmsg { + unsigned char nduseropt_family; + unsigned char nduseropt_pad1; + unsigned short nduseropt_opts_len; /* Total length of options */ + int nduseropt_ifindex; + __u8 nduseropt_icmp_type; + __u8 nduseropt_icmp_code; + unsigned short nduseropt_pad2; + unsigned int nduseropt_pad3; + /* Followed by one or more ND options */ +}; + +enum { + NDUSEROPT_UNSPEC, + NDUSEROPT_SRCADDR, + __NDUSEROPT_MAX +}; + +#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1) + +/* RTnetlink multicast groups - backwards compatibility for userspace */ +#define RTMGRP_LINK 1 +#define RTMGRP_NOTIFY 2 +#define RTMGRP_NEIGH 4 +#define RTMGRP_TC 8 + +#define RTMGRP_IPV4_IFADDR 0x10 +#define RTMGRP_IPV4_MROUTE 0x20 +#define RTMGRP_IPV4_ROUTE 0x40 +#define RTMGRP_IPV4_RULE 0x80 + +#define RTMGRP_IPV6_IFADDR 0x100 +#define RTMGRP_IPV6_MROUTE 0x200 +#define RTMGRP_IPV6_ROUTE 0x400 +#define RTMGRP_IPV6_IFINFO 0x800 + +#define RTMGRP_DECnet_IFADDR 0x1000 +#define RTMGRP_DECnet_ROUTE 0x4000 + +#define RTMGRP_IPV6_PREFIX 0x20000 + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV4_RULE, +#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_NOP2, + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE + RTNLGRP_NOP4, + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE + RTNLGRP_ND_USEROPT, +#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE + RTNLGRP_DCB, +#define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) + +/* TC action piece */ +struct tcamsg { + unsigned char tca_family; + unsigned char tca__pad1; + unsigned short tca__pad2; +}; +#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) +#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) +#define TCA_ACT_TAB 1 /* attr type must be >=1 */ +#define TCAA_MAX 1 + +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) + +/* End of information exported to user level */ + + + +#endif /* __LINUX_RTNETLINK_H */