--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -104,6 +104,29 @@ static int ath_max_4ms_framelen[4][32] =
 /* Aggregation logic */
 /*********************/
 
+static void ath_txq_lock(struct ath_softc *sc, struct ath_txq *txq)
+{
+	spin_lock_bh(&txq->axq_lock);
+}
+
+static void ath_txq_unlock(struct ath_softc *sc, struct ath_txq *txq)
+{
+	spin_unlock_bh(&txq->axq_lock);
+}
+
+static void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq)
+{
+	struct sk_buff_head q;
+	struct sk_buff *skb;
+
+	__skb_queue_head_init(&q);
+	skb_queue_splice_init(&txq->complete_q, &q);
+	spin_unlock_bh(&txq->axq_lock);
+
+	while ((skb = __skb_dequeue(&q)))
+		ieee80211_tx_status(sc->hw, skb);
+}
+
 static void ath_tx_queue_tid(struct ath_txq *txq, struct ath_atx_tid *tid)
 {
 	struct ath_atx_ac *ac = tid->ac;
@@ -130,7 +153,7 @@ static void ath_tx_resume_tid(struct ath
 
 	WARN_ON(!tid->paused);
 
-	spin_lock_bh(&txq->axq_lock);
+	ath_txq_lock(sc, txq);
 	tid->paused = false;
 
 	if (skb_queue_empty(&tid->buf_q))
@@ -139,7 +162,7 @@ static void ath_tx_resume_tid(struct ath
 	ath_tx_queue_tid(txq, tid);
 	ath_txq_schedule(sc, txq);
 unlock:
-	spin_unlock_bh(&txq->axq_lock);
+	ath_txq_unlock_complete(sc, txq);
 }
 
 static struct ath_frame_info *get_frame_info(struct sk_buff *skb)
@@ -189,8 +212,11 @@ static void ath_tx_flush_tid(struct ath_
 		tid->state &= ~AGGR_CLEANUP;
 	}
 
-	if (sendbar)
+	if (sendbar) {
+		ath_txq_unlock(sc, txq);
 		ath_send_bar(tid, tid->seq_start);
+		ath_txq_lock(sc, txq);
+	}
 }
 
 static void ath_tx_update_baw(struct ath_softc *sc, struct ath_atx_tid *tid,
@@ -564,13 +590,6 @@ static void ath_tx_complete_aggr(struct 
 		bf = bf_next;
 	}
 
-	if (bar_index >= 0) {
-		u16 bar_seq = ATH_BA_INDEX2SEQ(seq_first, bar_index);
-		ath_send_bar(tid, ATH_BA_INDEX2SEQ(seq_first, bar_index + 1));
-		if (BAW_WITHIN(tid->seq_start, tid->baw_size, bar_seq))
-			tid->bar_index = ATH_BA_INDEX(tid->seq_start, bar_seq);
-	}
-
 	/* prepend un-acked frames to the beginning of the pending frame queue */
 	if (!skb_queue_empty(&bf_pending)) {
 		if (an->sleeping)
@@ -585,6 +604,17 @@ static void ath_tx_complete_aggr(struct 
 		}
 	}
 
+	if (bar_index >= 0) {
+		u16 bar_seq = ATH_BA_INDEX2SEQ(seq_first, bar_index);
+
+		if (BAW_WITHIN(tid->seq_start, tid->baw_size, bar_seq))
+			tid->bar_index = ATH_BA_INDEX(tid->seq_start, bar_seq);
+
+		ath_txq_unlock(sc, txq);
+		ath_send_bar(tid, ATH_BA_INDEX2SEQ(seq_first, bar_index + 1));
+		ath_txq_lock(sc, txq);
+	}
+
 	if (tid->state & AGGR_CLEANUP)
 		ath_tx_flush_tid(sc, tid);
 
@@ -1183,7 +1213,7 @@ void ath_tx_aggr_stop(struct ath_softc *
 		return;
 	}
 
-	spin_lock_bh(&txq->axq_lock);
+	ath_txq_lock(sc, txq);
 	txtid->paused = true;
 
 	/*
@@ -1198,7 +1228,7 @@ void ath_tx_aggr_stop(struct ath_softc *
 		txtid->state &= ~AGGR_ADDBA_COMPLETE;
 
 	ath_tx_flush_tid(sc, txtid);
-	spin_unlock_bh(&txq->axq_lock);
+	ath_txq_unlock_complete(sc, txq);
 }
 
 void ath_tx_aggr_sleep(struct ieee80211_sta *sta, struct ath_softc *sc,
@@ -1219,7 +1249,7 @@ void ath_tx_aggr_sleep(struct ieee80211_
 		ac = tid->ac;
 		txq = ac->txq;
 
-		spin_lock_bh(&txq->axq_lock);
+		ath_txq_lock(sc, txq);
 
 		buffered = !skb_queue_empty(&tid->buf_q);
 
@@ -1231,7 +1261,7 @@ void ath_tx_aggr_sleep(struct ieee80211_
 			list_del(&ac->list);
 		}
 
-		spin_unlock_bh(&txq->axq_lock);
+		ath_txq_unlock(sc, txq);
 
 		ieee80211_sta_set_buffered(sta, tidno, buffered);
 	}
@@ -1250,7 +1280,7 @@ void ath_tx_aggr_wakeup(struct ath_softc
 		ac = tid->ac;
 		txq = ac->txq;
 
-		spin_lock_bh(&txq->axq_lock);
+		ath_txq_lock(sc, txq);
 		ac->clear_ps_filter = true;
 
 		if (!skb_queue_empty(&tid->buf_q) && !tid->paused) {
@@ -1258,7 +1288,7 @@ void ath_tx_aggr_wakeup(struct ath_softc
 			ath_txq_schedule(sc, txq);
 		}
 
-		spin_unlock_bh(&txq->axq_lock);
+		ath_txq_unlock_complete(sc, txq);
 	}
 }
 
@@ -1358,6 +1388,7 @@ struct ath_txq *ath_txq_setup(struct ath
 		txq->axq_qnum = axq_qnum;
 		txq->mac80211_qnum = -1;
 		txq->axq_link = NULL;
+		__skb_queue_head_init(&txq->complete_q);
 		INIT_LIST_HEAD(&txq->axq_q);
 		INIT_LIST_HEAD(&txq->axq_acq);
 		spin_lock_init(&txq->axq_lock);
@@ -1482,7 +1513,8 @@ static void ath_drain_txq_list(struct at
  */
 void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq, bool retry_tx)
 {
-	spin_lock_bh(&txq->axq_lock);
+	ath_txq_lock(sc, txq);
+
 	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) {
 		int idx = txq->txq_tailidx;
 
@@ -1503,7 +1535,7 @@ void ath_draintxq(struct ath_softc *sc, 
 	if ((sc->sc_flags & SC_OP_TXAGGR) && !retry_tx)
 		ath_txq_drain_pending_buffers(sc, txq);
 
-	spin_unlock_bh(&txq->axq_lock);
+	ath_txq_unlock_complete(sc, txq);
 }
 
 bool ath_drain_all_txq(struct ath_softc *sc, bool retry_tx)
@@ -1947,7 +1979,7 @@ int ath_tx_start(struct ieee80211_hw *hw
 
 	q = skb_get_queue_mapping(skb);
 
-	spin_lock_bh(&txq->axq_lock);
+	ath_txq_lock(sc, txq);
 
 	if (txq == sc->tx.txq_map[q] &&
 	    ++txq->pending_frames > ATH_MAX_QDEPTH && !txq->stopped) {
@@ -1957,7 +1989,7 @@ int ath_tx_start(struct ieee80211_hw *hw
 
 	ath_tx_start_dma(sc, skb, txctl, tid);
 
-	spin_unlock_bh(&txq->axq_lock);
+	ath_txq_unlock(sc, txq);
 
 	return 0;
 }
@@ -1969,7 +2001,6 @@ int ath_tx_start(struct ieee80211_hw *hw
 static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
 			    int tx_flags, struct ath_txq *txq)
 {
-	struct ieee80211_hw *hw = sc->hw;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
 	struct ieee80211_hdr * hdr = (struct ieee80211_hdr *)skb->data;
@@ -2013,7 +2044,7 @@ static void ath_tx_complete(struct ath_s
 		}
 	}
 
-	ieee80211_tx_status(hw, skb);
+	__skb_queue_tail(&txq->complete_q, skb);
 }
 
 static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
@@ -2149,7 +2180,7 @@ static void ath_tx_processq(struct ath_s
 		txq->axq_qnum, ath9k_hw_gettxbuf(sc->sc_ah, txq->axq_qnum),
 		txq->axq_link);
 
-	spin_lock_bh(&txq->axq_lock);
+	ath_txq_lock(sc, txq);
 	for (;;) {
 		if (work_pending(&sc->hw_reset_work))
 			break;
@@ -2208,7 +2239,7 @@ static void ath_tx_processq(struct ath_s
 
 		ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head);
 	}
-	spin_unlock_bh(&txq->axq_lock);
+	ath_txq_unlock_complete(sc, txq);
 }
 
 static void ath_tx_complete_poll_work(struct work_struct *work)
@@ -2225,17 +2256,17 @@ static void ath_tx_complete_poll_work(st
 	for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++)
 		if (ATH_TXQ_SETUP(sc, i)) {
 			txq = &sc->tx.txq[i];
-			spin_lock_bh(&txq->axq_lock);
+			ath_txq_lock(sc, txq);
 			if (txq->axq_depth) {
 				if (txq->axq_tx_inprogress) {
 					needreset = true;
-					spin_unlock_bh(&txq->axq_lock);
+					ath_txq_unlock(sc, txq);
 					break;
 				} else {
 					txq->axq_tx_inprogress = true;
 				}
 			}
-			spin_unlock_bh(&txq->axq_lock);
+			ath_txq_unlock_complete(sc, txq);
 		}
 
 	if (needreset) {
@@ -2293,10 +2324,10 @@ void ath_tx_edma_tasklet(struct ath_soft
 
 		txq = &sc->tx.txq[ts.qid];
 
-		spin_lock_bh(&txq->axq_lock);
+		ath_txq_lock(sc, txq);
 
 		if (list_empty(&txq->txq_fifo[txq->txq_tailidx])) {
-			spin_unlock_bh(&txq->axq_lock);
+			ath_txq_unlock(sc, txq);
 			return;
 		}
 
@@ -2322,7 +2353,7 @@ void ath_tx_edma_tasklet(struct ath_soft
 		}
 
 		ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head);
-		spin_unlock_bh(&txq->axq_lock);
+		ath_txq_unlock_complete(sc, txq);
 	}
 }
 
@@ -2460,7 +2491,7 @@ void ath_tx_node_cleanup(struct ath_soft
 		ac = tid->ac;
 		txq = ac->txq;
 
-		spin_lock_bh(&txq->axq_lock);
+		ath_txq_lock(sc, txq);
 
 		if (tid->sched) {
 			list_del(&tid->list);
@@ -2476,6 +2507,6 @@ void ath_tx_node_cleanup(struct ath_soft
 		tid->state &= ~AGGR_ADDBA_COMPLETE;
 		tid->state &= ~AGGR_CLEANUP;
 
-		spin_unlock_bh(&txq->axq_lock);
+		ath_txq_unlock(sc, txq);
 	}
 }
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -196,6 +196,7 @@ struct ath_txq {
 	u8 txq_headidx;
 	u8 txq_tailidx;
 	int pending_frames;
+	struct sk_buff_head complete_q;
 };
 
 struct ath_atx_ac {