Trickle

Trickle is a small Linux kernel netfilter module I use for traffic control, very similar to Stochastic Fairness Queueing implemented by tc (man 8 tc-sfq). The difference is trickle works on the incoming traffic, as opposed to tc that works on the outgoing traffic. It is thus useful for workstations, limiting TCP session rates in such a way no session will take over and saturate a small DSL Internet access pipe.

The saturation usually happens when the user has a download or some media streaming going on. Trying to access a web page in this moment will be very slow. The solution is to limit the rate of the download or streaming session, so the user can still use her connection to surf the web.

The algorithm is fairly simple. Incoming TCP packets are first placed in 32 buckets based on source IP address, using a fast and simple hashing function. If multiple buckets exhibit traffic, the traffic is limited on each bucket to 60% of the maximum rate available. In the case when only one bucket is active, the rate limitation is removed and the full available bandwidth is used.

The end result is a better user experience. Whenever the user clicks a link in the browser, the download is trickled down temporarily until the web page is loaded.

The module consists of two files: Makefile and trickle.c.

Makefile

KDIR = /media/work/kernel/linux-3.2.51
obj-m = trickle.o
all: trickle.ko tmon

tmon: tmon.c
	gcc -O2 -o tmon tmon.c

trickle.ko: trickle.c
	make -C $(KDIR) M=$(CURDIR) modules

clean:
	make -C $(KDIR) M=$(CURDIR) clean
	rm -f *.so *.o modules.order
	rm -f tmon

start: all
	insmod trickle.ko
	sleep 2; cat /proc/trickle

stop:
	rmmod trickle

dist: all
	rm -fr trickle
	mkdir -p trickle
	cp Makefile trickle/.
	cp trickle.c trickle/.
	cp trickle.ko trickle/.
	cp tmon trickle/.
	cp tmon.c trickle/.
	tar -cjvf trickle.tar.bz2 trickle
	rm -fr trickle

trickle.c

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/udp.h>

MODULE_LICENSE("GPLv2");
#define TRICKLE_MAX_CLASSES 32

// timer
#define SAMPLES_MA 5 // 5 seconds for moving average
static int samples_ma;
static struct timer_list rate_timer; // periodic cycle timer
static int use_all = 1; // start without rate limiting
static uint64_t dropped = 0;
static uint32_t bytes_max_1s = 0;
static uint32_t bytes_max_cnt = 0;
static uint32_t rate_limit = 0;

// reporting
#define CLASS_ACTIVE_BYTES 512 // consider the class active
#define IP_ACTIVE_PKTS 20 // report IP address as active

// SMP support
static DEFINE_SPINLOCK(trickle_lock);

// trickle classes
typedef struct {
	uint32_t bytes_cnt;
	uint32_t bytes_ma[SAMPLES_MA]; // moving average
	uint32_t pkts_cnt;
	uint32_t pkts;
	uint32_t ip;
	uint32_t ip_storage;
} TClass;

static TClass tclass[TRICKLE_MAX_CLASSES];

// periodic timer
static void rate_timer_calc(unsigned long dummy) {
	int i;
	int allcnt = 0;
	spin_lock(&trickle_lock);

	for (i = 0; i < TRICKLE_MAX_CLASSES; i++) {
		TClass *tc = &tclass[i];
		if (tc->bytes_cnt >= CLASS_ACTIVE_BYTES)
			allcnt++;
		bytes_max_cnt += tc->bytes_cnt;
		tc->bytes_ma[samples_ma] = tc->bytes_cnt;
		tc->bytes_cnt = 0;
		tc->pkts = tc->pkts_cnt;
		tc->pkts_cnt = 0;
		tc->ip_storage = tc->ip;
		tc->ip = 0;
	}

	if (++samples_ma >= SAMPLES_MA)
		samples_ma = 0;
	bytes_max_1s = (bytes_max_1s > bytes_max_cnt)? bytes_max_1s: bytes_max_cnt;
	bytes_max_cnt = 0;
	rate_limit = (bytes_max_1s * 60) / 100;

	if (allcnt <= 1)
		use_all = 1;
	else
		use_all = 0;

	spin_unlock(&trickle_lock);

	// restart timer
	mod_timer(&rate_timer, jiffies + HZ);
}


static unsigned int hook_func(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *)) {

	struct iphdr _iph,*iph;
	TClass *tc;
	unsigned char *s;
	unsigned char hash;

	if (!skb)
		return NF_ACCEPT;

	iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);

	if (!iph || iph->protocol != IPPROTO_TCP)
		return NF_ACCEPT;

	s = (unsigned char *) &iph->saddr;

	// do not count local traffic (10.0.0.0/8, 192.168.0.0/16, 172.16.0.0/12 )
	if (s[0] == 10 ||
	   (s[0] == 192 && s[1] == 168) ||
	   (s[0] == 172 && s[1] >= 16 && s[1] < 32))
		return NF_ACCEPT;

	hash = (s[0] ^ s[1] ^ s[2] ^ s[3]) % TRICKLE_MAX_CLASSES;
	tc = &tclass[hash];

	if (!use_all && tc->bytes_cnt > rate_limit)
		goto dropit;

	spin_lock(&trickle_lock);
	tc->bytes_cnt += ntohs(iph->tot_len);
	if (++tc->pkts_cnt >= IP_ACTIVE_PKTS)
		tc->ip = iph->saddr;
	spin_unlock(&trickle_lock);
	return NF_ACCEPT;

dropit:
	spin_lock(&trickle_lock);
	bytes_max_cnt += ntohs(iph->tot_len);
	dropped++;
	spin_unlock(&trickle_lock);
	return NF_DROP;

}


static struct nf_hook_ops nfho = {
	.hook     = hook_func,
	.hooknum  = 1, //NF_IP_LOCAL_IN,
	.pf       = PF_INET,
	.priority = NF_IP_PRI_FIRST
};

int trickle_read_procmem(char *buf, char **start, off_t offset, int count, int *eof, void *data) {
	int i;
	char *bufin = buf;

	sprintf(buf, "Dropped %llu, max rate %uKB/s, rate limit %uKB/s%s\n",
		dropped,
		bytes_max_1s/1024,
		rate_limit / 1024,
		(use_all)? ", using all bandwidth": "");
	buf += strlen(buf);

	for (i = 0; i < TRICKLE_MAX_CLASSES; i++) {
		uint64_t cnt = 0;
		uint64_t rate;
		TClass *tc = &tclass[i];
		int j;

		for (j = 0; j < SAMPLES_MA; j++)
			cnt += tc->bytes_ma[j];
		if (cnt == 0)
			continue;
		rate = cnt / SAMPLES_MA;

		if (rate >= 1024 * 1024)
			sprintf(buf, "%2d: %4llu MB/s, %u pkts",
				i, rate / (1024 * 1024), tc->pkts);
		else if (rate >= 1024)
			sprintf(buf, "%2d: %4llu KB/s, %u pkts",
				i, rate / 1024, tc->pkts);
		else
			sprintf(buf, "%2d: %4llu B/s,  %u pkts",
				i, rate, tc->pkts);
		buf += strlen(buf);

		if (tc->ip_storage) {
			unsigned char *s = (unsigned char *) &tc->ip_storage;
			sprintf(buf, ", %u.%u.%u.%u\n", 
				s[0], s[1], s[2], s[3]);
		}
		else
			sprintf(buf, "\n");
		buf += strlen(buf);
	}

	return strlen(bufin);
}


static int __init init_main(void) {
	if (nf_register_hook(&nfho) < 0) {
		printk(KERN_INFO "trickle: failed initializing module.\n");
		return 1;
	}

	printk(KERN_INFO "trickle: initializing module.\n");

	create_proc_read_entry("trickle", 0, NULL, trickle_read_procmem, NULL);
	setup_timer(&rate_timer, rate_timer_calc, 0);
	mod_timer(&rate_timer, jiffies + HZ);

	return 0;
}


static void __exit cleanup_main(void) {
	nf_unregister_hook(&nfho);

	printk(KERN_INFO "trickle: removing module.\n");
	remove_proc_entry("trickle", NULL);
	del_timer_sync(&rate_timer);
}


module_init(init_main);
module_exit(cleanup_main);

Place the files in an empty directory, modify KDIR in Makefile to point to a directory where you have a full compilation of your Linux kernel, and build the module. Use insmod trickle.ko to start it, and rmmod trickle to stop it.

Run-time status for the module is stored in /proc/trickle. I also use a small monitoring program, tmon to print the status every one second.

tmon.c

#include <stdio.h>

#define MAXBUF 10240 // 10K
static char buf[MAXBUF + 1];

static FILE *read_nohost(FILE *fp) {
	int len = fread(buf, 1, MAXBUF, fp);
	if (len > 0) {
		buf[len] = '\0';
		printf("%s\n", buf);
		rewind(fp);
	}
	else {
		fclose(fp);
		fp = NULL;
	}
	
	return fp;
}

int main() {
	FILE *fp = NULL;

	while (1) {
		sleep(1);
		printf("\033[2J\033[;H");
		if (!fp)
			fp = fopen("/proc/trickle", "r");

		if (!fp) {
			printf("Error: tickle module inactive\n");
				continue;
		}
		
		fp = read_nohost(fp);
	}
	
	fclose(fp);
	return 0;
}

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s