openwrt-owl/obsolete-buildroot/sources/openwrt-linux-sch_htb.patch

571 lines
17 KiB
Diff

--- src/linux/linux/net/sched/sch_htb.c 2003-10-14 01:09:35.000000000 -0700
+++ src/linux/linux.2.4.26/net/sched/sch_htb.c 2004-05-10 00:05:51.000000000 -0700
@@ -9,6 +9,8 @@
* Authors: Martin Devera, <devik@cdi.cz>
*
* Credits (in time order) for older HTB versions:
+ * Stef Coene <stef.coene@docum.org>
+ * HTB support at LARTC mailing list
* Ondrej Kraus, <krauso@barr.cz>
* found missing INIT_QDISC(htb)
* Vladimir Smelhaus, Aamer Akhter, Bert Hubert
@@ -17,9 +19,13 @@
* code review and helpful comments on shaping
* Tomasz Wrona, <tw@eter.tym.pl>
* created test case so that I was able to fix nasty bug
+ * Wilfried Weissmann
+ * spotted bug in dequeue code and helped with fix
+ * Jiri Fojtasek
+ * fixed requeue routine
* and many others. thanks.
*
- * $Id: sch_htb.c,v 1.1.1.4 2003/10/14 08:09:35 sparq Exp $
+ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
*/
#include <linux/config.h>
#include <linux/module.h>
@@ -71,16 +77,12 @@
#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
-#define HTB_VER 0x30007 /* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30010 /* major must be matched with number suplied by TC as version */
#if HTB_VER >> 16 != TC_HTB_PROTOVER
#error "Mismatched sch_htb.c and pkt_sch.h"
#endif
-/* temporary debug defines to be removed after beta stage */
-#define DEVIK_MEND(N)
-#define DEVIK_MSTART(N)
-
/* debugging support; S is subsystem, these are defined:
0 - netlink messages
1 - enqueue
@@ -100,13 +102,16 @@
from LSB
*/
#ifdef HTB_DEBUG
-#define HTB_DBG(S,L,FMT,ARG...) if (((q->debug>>(2*S))&3) >= L) \
+#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
+#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
printk(KERN_DEBUG FMT,##ARG)
#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
#define HTB_PASSQ q,
#define HTB_ARGQ struct htb_sched *q,
#define static
+#undef __inline__
#define __inline__
+#undef inline
#define inline
#define HTB_CMAGIC 0xFEFAFEF1
#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
@@ -114,6 +119,7 @@
rb_erase(N,R); \
(N)->rb_color = -1; } while (0)
#else
+#define HTB_DBG_COND(S,L) (0)
#define HTB_DBG(S,L,FMT,ARG...)
#define HTB_PASSQ
#define HTB_ARGQ
@@ -219,6 +225,9 @@
/* time of nearest event per level (row) */
unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
+ /* cached value of jiffies in dequeue */
+ unsigned long jiffies;
+
/* whether we hit non-work conserving class during this dequeue; we use */
int nwc_hit; /* this to disable mindelay complaint in dequeue */
@@ -297,7 +306,7 @@
rules in it */
if (skb->priority == sch->handle)
return HTB_DIRECT; /* X:0 (direct flow) selected */
- if ((cl = htb_find(skb->priority,sch)) != NULL)
+ if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0)
return cl;
tcf = q->filter_list;
@@ -338,7 +347,7 @@
static void htb_debug_dump (struct htb_sched *q)
{
int i,p;
- printk(KERN_DEBUG "htb*g j=%lu\n",jiffies);
+ printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
/* rows */
for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
@@ -421,26 +430,24 @@
if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
#endif
- DEVIK_MSTART(9);
- cl->pq_key = jiffies + PSCHED_US2JIFFIE(delay);
- if (cl->pq_key == jiffies)
+ cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
+ if (cl->pq_key == q->jiffies)
cl->pq_key++;
/* update the nearest event cache */
- if (q->near_ev_cache[cl->level] - cl->pq_key < 0x80000000)
+ if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
q->near_ev_cache[cl->level] = cl->pq_key;
while (*p) {
struct htb_class *c; parent = *p;
c = rb_entry(parent, struct htb_class, pq_node);
- if (cl->pq_key - c->pq_key < 0x80000000)
+ if (time_after_eq(cl->pq_key, c->pq_key))
p = &parent->rb_right;
else
p = &parent->rb_left;
}
rb_link_node(&cl->pq_node, parent, p);
rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
- DEVIK_MEND(9);
}
/**
@@ -453,12 +460,14 @@
{
rb_node_t *p;
if ((*n)->rb_right) {
+ /* child at right. use it or its leftmost ancestor */
*n = (*n)->rb_right;
while ((*n)->rb_left)
*n = (*n)->rb_left;
return;
}
while ((p = (*n)->rb_parent) != NULL) {
+ /* if we've arrived from left child then we have next node */
if (p->rb_left == *n) break;
*n = p;
}
@@ -602,7 +611,7 @@
long toks;
if ((toks = (cl->ctokens + *diff)) < (
-#ifdef HTB_HYSTERESIS
+#if HTB_HYSTERESIS
cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
#endif
0)) {
@@ -610,7 +619,7 @@
return HTB_CANT_SEND;
}
if ((toks = (cl->tokens + *diff)) >= (
-#ifdef HTB_HYSTERESIS
+#if HTB_HYSTERESIS
cl->cmode == HTB_CAN_SEND ? -cl->buffer :
#endif
0))
@@ -689,7 +698,6 @@
struct htb_sched *q = (struct htb_sched *)sch->data;
struct htb_class *cl = htb_classify(skb,sch);
- DEVIK_MSTART(0);
if (cl == HTB_DIRECT || !cl) {
/* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen && cl) {
@@ -698,25 +706,20 @@
} else {
kfree_skb (skb);
sch->stats.drops++;
- DEVIK_MEND(0);
return NET_XMIT_DROP;
}
} else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
sch->stats.drops++;
cl->stats.drops++;
- DEVIK_MEND(0);
return NET_XMIT_DROP;
} else {
cl->stats.packets++; cl->stats.bytes += skb->len;
- DEVIK_MSTART(1);
htb_activate (q,cl);
- DEVIK_MEND(1);
}
sch->q.qlen++;
sch->stats.packets++; sch->stats.bytes += skb->len;
- HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
- DEVIK_MEND(0);
+ HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
return NET_XMIT_SUCCESS;
}
@@ -725,16 +728,18 @@
{
struct htb_sched *q = (struct htb_sched *)sch->data;
struct htb_class *cl = htb_classify(skb,sch);
+ struct sk_buff *tskb;
if (cl == HTB_DIRECT || !cl) {
/* enqueue to helper queue */
if (q->direct_queue.qlen < q->direct_qlen && cl) {
- __skb_queue_tail(&q->direct_queue, skb);
- q->direct_pkts++;
+ __skb_queue_head(&q->direct_queue, skb);
} else {
- kfree_skb (skb);
- sch->stats.drops++;
- return NET_XMIT_DROP;
+ __skb_queue_head(&q->direct_queue, skb);
+ tskb = __skb_dequeue_tail(&q->direct_queue);
+ kfree_skb (tskb);
+ sch->stats.drops++;
+ return NET_XMIT_CN;
}
} else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
sch->stats.drops++;
@@ -744,7 +749,7 @@
htb_activate (q,cl);
sch->q.qlen++;
- HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",cl?cl->classid:0,skb);
+ HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
return NET_XMIT_SUCCESS;
}
@@ -819,7 +824,7 @@
cl->classid, diff,
(unsigned long long) q->now,
(unsigned long long) cl->t_c,
- jiffies);
+ q->jiffies);
diff = 1000;
}
#endif
@@ -862,6 +867,7 @@
*
* Scans event queue for pending events and applies them. Returns jiffies to
* next pending event (0 for no event in pq).
+ * Note: Aplied are events whose have cl->pq_key <= jiffies.
*/
static long htb_do_events(struct htb_sched *q,int level)
{
@@ -876,9 +882,9 @@
while (p->rb_left) p = p->rb_left;
cl = rb_entry(p, struct htb_class, pq_node);
- if (cl->pq_key - (jiffies+1) < 0x80000000) {
- HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - jiffies);
- return cl->pq_key - jiffies;
+ if (time_after(cl->pq_key, q->jiffies)) {
+ HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
+ return cl->pq_key - q->jiffies;
}
htb_safe_rb_erase(p,q->wait_pq+level);
diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
@@ -889,7 +895,7 @@
cl->classid, diff,
(unsigned long long) q->now,
(unsigned long long) cl->t_c,
- jiffies);
+ q->jiffies);
diff = 1000;
}
#endif
@@ -916,6 +922,7 @@
rb_node_t **pptr;
} stk[TC_HTB_MAXDEPTH],*sp = stk;
+ BUG_TRAP(tree->rb_node);
sp->root = tree->rb_node;
sp->pptr = pptr;
@@ -949,16 +956,36 @@
htb_dequeue_tree(struct htb_sched *q,int prio,int level)
{
struct sk_buff *skb = NULL;
- //struct htb_sched *q = (struct htb_sched *)sch->data;
struct htb_class *cl,*start;
/* look initial class up in the row */
- DEVIK_MSTART(6);
start = cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
do {
- BUG_TRAP(cl && cl->un.leaf.q->q.qlen); if (!cl) return NULL;
+next:
+ BUG_TRAP(cl);
+ if (!cl) return NULL;
HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
prio,level,cl->classid,cl->un.leaf.deficit[level]);
+
+ /* class can be empty - it is unlikely but can be true if leaf
+ qdisc drops packets in enqueue routine or if someone used
+ graft operation on the leaf since last dequeue;
+ simply deactivate and skip such class */
+ if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
+ struct htb_class *next;
+ htb_deactivate(q,cl);
+
+ /* row/level might become empty */
+ if ((q->row_mask[level] & (1 << prio)) == 0)
+ return NULL;
+
+ next = htb_lookup_leaf (q->row[level]+prio,
+ prio,q->ptr[level]+prio);
+ if (cl == start) /* fix start if we just deleted it */
+ start = next;
+ cl = next;
+ goto next;
+ }
if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL))
break;
@@ -971,8 +998,6 @@
cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
} while (cl != start);
- DEVIK_MEND(6);
- DEVIK_MSTART(7);
if (likely(skb != NULL)) {
if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
@@ -984,11 +1009,8 @@
gives us slightly better performance */
if (!cl->un.leaf.q->q.qlen)
htb_deactivate (q,cl);
- DEVIK_MSTART(8);
htb_charge_class (q,cl,level,skb->len);
- DEVIK_MEND(8);
}
- DEVIK_MEND(7);
return skb;
}
@@ -1002,9 +1024,8 @@
printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
delay = 5*HZ;
}
- del_timer(&q->timer);
- q->timer.expires = jiffies + delay;
- add_timer(&q->timer);
+ /* why don't use jiffies here ? because expires can be in past */
+ mod_timer(&q->timer, q->jiffies + delay);
sch->flags |= TCQ_F_THROTTLED;
sch->stats.overlimits++;
HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
@@ -1016,7 +1037,11 @@
struct htb_sched *q = (struct htb_sched *)sch->data;
int level;
long min_delay;
+#ifdef HTB_DEBUG
+ int evs_used = 0;
+#endif
+ q->jiffies = jiffies;
HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
sch->q.qlen);
@@ -1027,27 +1052,26 @@
return skb;
}
- DEVIK_MSTART(2);
if (!sch->q.qlen) goto fin;
PSCHED_GET_TIME(q->now);
- min_delay = HZ*5;
+ min_delay = LONG_MAX;
q->nwc_hit = 0;
for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
/* common case optimization - skip event handler quickly */
int m;
long delay;
- DEVIK_MSTART(3);
- if (jiffies - q->near_ev_cache[level] < 0x80000000 || 0) {
+ if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
delay = htb_do_events(q,level);
- q->near_ev_cache[level] += delay ? delay : HZ;
+ q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
+#ifdef HTB_DEBUG
+ evs_used++;
+#endif
} else
- delay = q->near_ev_cache[level] - jiffies;
+ delay = q->near_ev_cache[level] - q->jiffies;
if (delay && min_delay > delay)
min_delay = delay;
- DEVIK_MEND(3);
- DEVIK_MSTART(5);
m = ~q->row_mask[level];
while (m != (int)(-1)) {
int prio = ffz (m);
@@ -1056,29 +1080,29 @@
if (likely(skb != NULL)) {
sch->q.qlen--;
sch->flags &= ~TCQ_F_THROTTLED;
- DEVIK_MEND(5);
goto fin;
}
}
- DEVIK_MEND(5);
}
- DEVIK_MSTART(4);
#ifdef HTB_DEBUG
- if (!q->nwc_hit && min_delay >= 5*HZ && net_ratelimit()) {
- printk(KERN_ERR "HTB: mindelay=%ld, report it please !\n",min_delay);
- htb_debug_dump(q);
+ if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
+ if (min_delay == LONG_MAX) {
+ printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
+ evs_used,q->jiffies,jiffies);
+ htb_debug_dump(q);
+ } else
+ printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
+ "too small rate\n",min_delay);
}
#endif
- htb_delay_by (sch,min_delay);
- DEVIK_MEND(4);
+ htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
fin:
- HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,jiffies,skb);
- DEVIK_MEND(2);
+ HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
return skb;
}
/* try to drop from each class (by prio) until one succeed */
-static int htb_drop(struct Qdisc* sch)
+static unsigned int htb_drop(struct Qdisc* sch)
{
struct htb_sched *q = (struct htb_sched *)sch->data;
int prio;
@@ -1086,14 +1110,15 @@
for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
struct list_head *p;
list_for_each (p,q->drops+prio) {
- struct htb_class *cl = list_entry(p,struct htb_class,
- un.leaf.drop_list);
+ struct htb_class *cl = list_entry(p, struct htb_class,
+ un.leaf.drop_list);
+ unsigned int len;
if (cl->un.leaf.q->ops->drop &&
- cl->un.leaf.q->ops->drop(cl->un.leaf.q)) {
+ (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
sch->q.qlen--;
if (!cl->un.leaf.q->q.qlen)
htb_deactivate (q,cl);
- return 1;
+ return len;
}
}
}
@@ -1208,7 +1233,8 @@
gopt.direct_pkts = q->direct_pkts;
#ifdef HTB_DEBUG
- htb_debug_dump(q);
+ if (HTB_DBG_COND(0,2))
+ htb_debug_dump(q);
#endif
gopt.version = HTB_VER;
gopt.rate2quantum = q->rate2quantum;
@@ -1289,6 +1315,9 @@
return -ENOBUFS;
sch_tree_lock(sch);
if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
+ if (cl->prio_activity)
+ htb_deactivate ((struct htb_sched*)sch->data,cl);
+
/* TODO: is it correct ? Why CBQ doesn't do it ? */
sch->q.qlen -= (*old)->q.qlen;
qdisc_reset(*old);
@@ -1323,7 +1352,7 @@
while ((tp = *fl) != NULL) {
*fl = tp->next;
- tp->ops->destroy(tp);
+ tcf_destroy(tp);
}
}
@@ -1371,11 +1400,16 @@
#ifdef HTB_RATECM
del_timer_sync (&q->rttim);
#endif
+ /* This line used to be after htb_destroy_class call below
+ and surprisingly it worked in 2.4. But it must precede it
+ because filter need its target class alive to be able to call
+ unbind_filter on it (without Oops). */
+ htb_destroy_filters(&q->filter_list);
+
while (!list_empty(&q->root))
htb_destroy_class (sch,list_entry(q->root.next,
struct htb_class,sibling));
- htb_destroy_filters(&q->filter_list);
__skb_queue_purge(&q->direct_queue);
MOD_DEC_USE_COUNT;
}
@@ -1438,12 +1472,13 @@
parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
- HTB_DBG(0,1,"htb_chg cl=%p, clid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
+ HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
if (!rtab || !ctab) goto failure;
if (!cl) { /* new class */
+ struct Qdisc *new_q;
/* check for valid classid */
if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
goto failure;
@@ -1467,6 +1502,10 @@
cl->magic = HTB_CMAGIC;
#endif
+ /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
+ so that can't be used inside of sch_tree_lock
+ -- thanks to Karlis Peisenieks */
+ new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
sch_tree_lock(sch);
if (parent && !parent->level) {
/* turn parent into inner node */
@@ -1485,8 +1524,7 @@
memset (&parent->un.inner,0,sizeof(parent->un.inner));
}
/* leaf (we) needs elementary qdisc */
- if (!(cl->un.leaf.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
- cl->un.leaf.q = &noop_qdisc;
+ cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
cl->classid = classid; cl->parent = parent;
@@ -1514,11 +1552,11 @@
if (!cl->level) {
cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
- printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.", cl->classid);
+ printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
cl->un.leaf.quantum = 1000;
}
if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
- printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.", cl->classid);
+ printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
cl->un.leaf.quantum = 200000;
}
if (hopt->quantum)
--- src/linux/linux/include/net/pkt_cls.h 2003-07-04 01:12:28.000000000 -0700
+++ src/linux/linux.2.4.26/include/net/pkt_cls.h 2004-05-10 22:21:40.000000000 -0700
@@ -77,7 +77,11 @@
return -1;
}
-
+static inline void tcf_destroy(struct tcf_proto *tp)
+{
+ tp->ops->destroy(tp);
+ kfree(tp);
+}
extern int register_tcf_proto_ops(struct tcf_proto_ops *ops);
extern int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);