From: Felix Fietkau Date: Sat, 16 Mar 2019 17:57:38 +0100 Subject: [PATCH] mac80211: calculate hash for fq without holding fq->lock in itxq enqueue Reduces lock contention on enqueue/dequeue of iTXQ packets Signed-off-by: Felix Fietkau --- --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -107,29 +107,31 @@ begin: return skb; } -static struct fq_flow *fq_flow_classify(struct fq *fq, - struct fq_tin *tin, - struct sk_buff *skb, - fq_flow_get_default_t get_default_func) +static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) { - struct fq_flow *flow; - u32 hash; - u32 idx; - - lockdep_assert_held(&fq->lock); - #if LINUX_VERSION_IS_GEQ(5,3,10) || \ LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \ LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \ LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \ LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0) - hash = skb_get_hash_perturb(skb, &fq->perturbation); + u32 hash = skb_get_hash_perturb(skb, &fq->perturbation); #else - hash = skb_get_hash_perturb(skb, fq->perturbation); + u32 hash = skb_get_hash_perturb(skb, fq->perturbation); #endif - idx = reciprocal_scale(hash, fq->flows_cnt); - flow = &fq->flows[idx]; + return reciprocal_scale(hash, fq->flows_cnt); +} + +static struct fq_flow *fq_flow_classify(struct fq *fq, + struct fq_tin *tin, u32 idx, + struct sk_buff *skb, + fq_flow_get_default_t get_default_func) +{ + struct fq_flow *flow; + + lockdep_assert_held(&fq->lock); + + flow = &fq->flows[idx]; if (flow->tin && flow->tin != tin) { flow = get_default_func(fq, tin, idx, skb); tin->collisions++; @@ -161,7 +163,7 @@ static void fq_recalc_backlog(struct fq } static void fq_tin_enqueue(struct fq *fq, - struct fq_tin *tin, + struct fq_tin *tin, u32 idx, struct sk_buff *skb, fq_skb_free_t free_func, fq_flow_get_default_t get_default_func) @@ -171,7 +173,7 @@ static void fq_tin_enqueue(struct fq *fq lockdep_assert_held(&fq->lock); - flow = fq_flow_classify(fq, tin, skb, get_default_func); + flow = fq_flow_classify(fq, tin, idx, skb, get_default_func); flow->tin = tin; flow->backlog += skb->len; --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1390,11 +1390,15 @@ static void ieee80211_txq_enqueue(struct { struct fq *fq = &local->fq; struct fq_tin *tin = &txqi->tin; + u32 flow_idx = fq_flow_idx(fq, skb); ieee80211_set_skb_enqueue_time(skb); - fq_tin_enqueue(fq, tin, skb, + + spin_lock_bh(&fq->lock); + fq_tin_enqueue(fq, tin, flow_idx, skb, fq_skb_free_func, fq_flow_get_default_func); + spin_unlock_bh(&fq->lock); } static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin, @@ -1564,7 +1568,6 @@ static bool ieee80211_queue_skb(struct i struct sta_info *sta, struct sk_buff *skb) { - struct fq *fq = &local->fq; struct ieee80211_vif *vif; struct txq_info *txqi; @@ -1582,9 +1585,7 @@ static bool ieee80211_queue_skb(struct i if (!txqi) return false; - spin_lock_bh(&fq->lock); ieee80211_txq_enqueue(local, txqi, skb); - spin_unlock_bh(&fq->lock); schedule_and_wake_txq(local, txqi); @@ -3198,6 +3199,7 @@ static bool ieee80211_amsdu_aggregate(st u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; + u32 flow_idx; int orig_truesize; __be16 len; void *data; @@ -3220,6 +3222,8 @@ static bool ieee80211_amsdu_aggregate(st max_amsdu_len = min_t(int, max_amsdu_len, sta->sta.max_rc_amsdu_len); + flow_idx = fq_flow_idx(fq, skb); + spin_lock_bh(&fq->lock); /* TODO: Ideally aggregation should be done on dequeue to remain @@ -3227,7 +3231,8 @@ static bool ieee80211_amsdu_aggregate(st */ tin = &txqi->tin; - flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func); + flow = fq_flow_classify(fq, tin, flow_idx, skb, + fq_flow_get_default_func); head = skb_peek_tail(&flow->queue); if (!head) goto out;