6.2 TCP滑动窗口

时间:2021-05-07 23:40:20

  TCP滑动窗口的功能是实现流量控制。数据接收方只接收seq落入窗口范围内的数据;发送方也不会发送窗口之外的数据,一旦发现窗口太小则会停止发送直到窗口变大,这样TCP数据接收方就能通过窗口通告来控制数据发送方发送数据的速度。窗口的值存储在TCP报文段的window字段中,大小为16bit,即窗口的最大值是65535。如果使用窗口扩大选项(后续讨论),则通告窗口的值为window左移窗口扩大因子个位数。

  关于滑动窗口其实只有三个问题需要明晰:

(1)窗口(包括初始窗口)如何生成(即生产)

(2)窗口信息如何更新(即维护)

(3)窗口如何被使用(即消费)

  下面一一解答。

6.2.1 窗口生产

  初始窗口的设置在发送SYN和SYN|ACK时进行:


 2752 void tcp_connect_init(struct sock *sk)
2753 {
2754     const struct dst_entry *dst = __sk_dst_get(sk);
2755     struct tcp_sock *tp = tcp_sk(sk);
2756     __u8 rcv_wscale;     
...
2789     tcp_select_initial_window(tcp_full_space(sk),
2790                   tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphd     r) : 0), //MSS - 时间戳选项大小
2791                   &tp->rcv_wnd,
2792                   &tp->window_clamp,
2793                   sysctl_tcp_window_scaling,
2794                   &rcv_wscale,
2795                   dst_metric(dst, RTAX_INITRWND));
2796 
2797     tp->rx_opt.rcv_wscale = rcv_wscale;
2798     tp->rcv_ssthresh = tp->rcv_wnd; ... 
   构建SYN|ACK时:
 2654 struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2655                 struct request_sock *req,
2656                 struct tcp_fastopen_cookie *foc)
2657 {
...
2692         /* tcp_full_space because it is guaranteed to be the first packet */
2693         tcp_select_initial_window(tcp_full_space(sk),
2694             mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), //MSS - 时间戳选项大小
2695             &req->rcv_wnd,
2696             &req->window_clamp,
2697             ireq->wscale_ok,
2698             &rcv_wscale,
2699             dst_metric(dst, RTAX_INITRWND));
2700         ireq->rcv_wscale = rcv_wscale;
...
  这样看来 计算初始窗口大小的功能是由tcp_select_initial_window函数完成的:
 191 void tcp_select_initial_window(int __space, __u32 mss,
 192                    __u32 *rcv_wnd, __u32 *window_clamp,
 193                    int wscale_ok, __u8 *rcv_wscale,
 194                    __u32 init_rcv_wnd)
 195 {   
 196     unsigned int space = (__space < 0 ? 0 : __space); //__space是TCP根据接收缓存的大小计算出来的
 197         
 198     /* If no clamp set the clamp to the max possible scaled window */
 199     if (*window_clamp == 0)    //*window_clamp的值是通告给对端的最大的窗口值
 200         (*window_clamp) = (65535 << 14); //窗口扩大因子最大为14
 201     space = min(*window_clamp, space);
 202                   
 203     /* Quantize space offering to a multiple of mss if possible. */
 204     if (space > mss)
 205         space = (space / mss) * mss; //将space整理为mss的整数倍
...
 215     if (sysctl_tcp_workaround_signed_windows) //使用有符号的接收窗口
 216         (*rcv_wnd) = min(space, MAX_TCP_WINDOW);  //设置通告窗口小于32767,否则可能会导致一些不稳定的TCP协议实现的崩溃
 217     else
 218         (*rcv_wnd) = space; //将通告窗口的值设置为space
 219
 220     (*rcv_wscale) = 0;
 221     if (wscale_ok) {//开启窗口扩大选项
 222         /* Set window scaling on max possible window
 223          * See RFC1323 for an explanation of the limit to 14
 224          */
 225         space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); //接收缓存最大空间
 226         space = min_t(u32, space, *window_clamp);
 227         while (space > 65535 && (*rcv_wscale) < 14) { //计算窗口扩大因子
 228             space >>= 1;
 229             (*rcv_wscale)++;
 230         }
 231     }
 232
 233     /* Set initial window to a value enough for senders starting with
 234      * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
 235      * a limit on the initial window when mss is larger than 1460.
 236      */
 237     if (mss > (1 << *rcv_wscale)) {
 238         int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
 239         if (mss > 1460)
 240             init_cwnd =
 241             max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
 242         /* when initializing use the value from init_rcv_wnd
 243          * rather than the default from above
 244          */
 245         if (init_rcv_wnd)
 246             *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
 247         else
 248             *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
 249     }
 250
 251     /* Set the clamp no higher than max representable value */
 252     (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
 253 }
   tcp_full_space函数返回可用空间的大小:
 1056 static inline int tcp_win_from_space(int space)
1057 {
1058     return sysctl_tcp_adv_win_scale<=0 ?
1059         (space>>(-sysctl_tcp_adv_win_scale)) :
1060         space - (space>>sysctl_tcp_adv_win_scale);
1061 }
...
1070 static inline int tcp_full_space(const struct sock *sk)
1071 {
1072     return tcp_win_from_space(sk->sk_rcvbuf);
1073 }
   可见,tcp_full_space函数在sysctl_tcp_adv_win_scale不为0的情况下会返回全部接收缓存空间的一部分的大小。
   tcp_transmit_skb函数在构建TCP首部字段时会根据包中是否有SYN标记位做区别处理:

828 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 829                 gfp_t gfp_mask)
 830 {
 831     const struct inet_connection_sock *icsk = inet_csk(sk);
 832     struct inet_sock *inet;
 833     struct tcp_sock *tp;
 834     struct tcp_skb_cb *tcb;
 835     struct tcp_out_options opts;   
 836     unsigned int tcp_options_size, tcp_header_size;
 837     struct tcp_md5sig_key *md5;    
 838     struct tcphdr *th;
 839     int err;
...
 903     if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
 904         /* RFC1323: The window in SYN & SYN/ACK segments
 905          * is never scaled.
 906          */
 907         th->window  = htons(min(tp->rcv_wnd, 65535U));
 908     } else {
 909         th->window  = htons(tcp_select_window(sk));
 910     }
...
   对于SYN包窗口会取tcp_select_initial_window设置的值(没有超出窗口最大值的情况下);

  连接建立完成后发送的报文的窗口会选取tcp_select_window计算的值:

 261 static u16 tcp_select_window(struct sock *sk)
 262 {
 263     struct tcp_sock *tp = tcp_sk(sk);
 264     u32 cur_win = tcp_receive_window(tp);//tp->rcv_wnd减去已经接收的数据长度为当前窗口大小
 265     u32 new_win = __tcp_select_window(sk);//获得新的窗口大小
 266
 267     /* Never shrink the offered window */
 268     if (new_win < cur_win) {//出现窗口左移的可能
 269         /* Danger Will Robinson!
 270          * Don't update rcv_wup/rcv_wnd here or else
 271          * we will not be able to advertise a zero
 272          * window in time.  --DaveM
 273          *
 274          * Relax Will Robinson.
 275          */
 276         new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);//将cur_win以1 << tp->rx_opt.rcv_wscale的倍数对齐的结果作为新的窗口
 277     }
 278     tp->rcv_wnd = new_win;
 279     tp->rcv_wup = tp->rcv_nxt;
 280
 281     /* Make sure we do not exceed the maximum possible
 282      * scaled window.
 283      */
 284     if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
 285         new_win = min(new_win, MAX_TCP_WINDOW); //有符号窗口不能超过32767
 286     else
 287         new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); //窗口不能超过最大值
 288
 289     /* RFC1323 scaling applied */
 290     new_win >>= tp->rx_opt.rcv_wscale; //使用窗口扩大因子
 291
 292     /* If we advertise zero window, disable fast path. */
 293     if (new_win == 0)
 294         tp->pred_flags = 0;
 295
 296     return new_win;
 297 }
   __tcp_select_window函数会根据接收缓存的信息计算新的窗口:
2111 u32 __tcp_select_window(struct sock *sk)
2112 {
2113     struct inet_connection_sock *icsk = inet_csk(sk);
2114     struct tcp_sock *tp = tcp_sk(sk);
2115     /* MSS for the peer's data.  Previous versions used mss_clamp
2116      * here.  I don't know if the value based on our guesses
2117      * of peer's MSS is better for the performance.  It's more correct
2118      * but may be worse for the performance because of rcv_mss
2119      * fluctuations.  --SAW  1998/11/1
2120      */
2121     int mss = icsk->icsk_ack.rcv_mss;
2122     int free_space = tcp_space(sk); //得到可用缓存空间大小
2123     int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
2124     int window;
2125
2126     if (mss > full_space)
2127         mss = full_space;
2128
2129     if (free_space < (full_space >> 1)) { //接收缓存空余空间小于接收缓存总空间的一半
2130         icsk->icsk_ack.quick = 0;
2131
2132         if (sk_under_memory_pressure(sk)) //内核内存紧张
2133             tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2134                            4U * tp->advmss); //减小当前最大窗口值
2135
2136         if (free_space < mss) //可用缓存太少,则返回0窗口
2137             return 0;
2138     }
2139
2140     if (free_space > tp->rcv_ssthresh)
2141         free_space = tp->rcv_ssthresh;
2142
2143     /* Don't do rounding if we are using window scaling, since the
2144      * scaled window will not line up with the MSS boundary anyway.
2145      */
2146     window = tp->rcv_wnd;
2147     if (tp->rx_opt.rcv_wscale) {//开启了窗口扩大选项
2148         window = free_space; //直接使用可用缓存大小
2149
2150         /* Advertise enough space so that it won't get scaled away.
2151          * Import case: prevent zero window announcement if
2152          * 1<<rcv_wscale > mss.
2153          */
2154         if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
2155             window = (((window >> tp->rx_opt.rcv_wscale) + 1) //+ 1是为了保证当 1<<rcv_wcalse > mss时对端至少可以发送一个报文段
2156                   << tp->rx_opt.rcv_wscale);
2157     } else {
2158         /* Get the largest window that is a nice multiple of mss.
2159          * Window clamp already applied above.
2160          * If our current window offering is within 1 mss of the
2161          * free space we just keep it. This prevents the divide
2162          * and multiply from happening most of the time.
2163          * We also don't do any window rounding when the free space
2164          * is too small.
2165          */
2166         if (window <= free_space - mss || window > free_space) //旧的通告窗口过大或过小
2167             window = (free_space / mss) * mss; //调整窗口大小为整数对端的MSS大小
2168         else if (mss == full_space && //可用空间仅仅为一个MSS
2169              free_space > window + (full_space >> 1)) //旧的通告窗口远小于可用空间大小
2170             window = free_space;
2171     }
2172
2173     return window;
2174 }
  综上可知,TCP数据发送端通告给对端的窗口是接收缓存大小的一种反映。

6.2.2 窗口维护

  接收到SYN|ACK和三次握手的ACK时TCP还会调用tcp_init_buffer_space更新一下窗口信息。收到SYN|ACK时:

5373 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5374                      const struct tcphdr *th, unsigned int len)
5375 {
...
5480         tcp_finish_connect(sk, skb);
...

5291 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5292 {
...
5315     tcp_init_buffer_space(sk)
...
  收到ACK时:
5600 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5601               const struct tcphdr *th, unsigned int len)
5602 {
...
5682         case TCP_SYN_RECV:
...
5700                     tcp_init_buffer_space(sk);
...
  tcp_init_buffer_space函数用于更新window_clamp信息:
 376 void tcp_init_buffer_space(struct sock *sk)
 377 {
 378     struct tcp_sock *tp = tcp_sk(sk);
 379     int maxwin;
 380 
 381     if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK))
 382         tcp_fixup_rcvbuf(sk);
 383     if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
 384         tcp_fixup_sndbuf(sk);
 385 
 386     tp->rcvq_space.space = tp->rcv_wnd;
 387 
 388     maxwin = tcp_full_space(sk);
 389 
 390     if (tp->window_clamp >= maxwin) {
 391         tp->window_clamp = maxwin;
 392 
 393         if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
 394             tp->window_clamp = max(maxwin -
 395                            (maxwin >> sysctl_tcp_app_win),
 396                            4 * tp->advmss);
 397     }
 398 
 399     /* Force reservation of one segment. */
 400     if (sysctl_tcp_app_win &&
 401         tp->window_clamp > 2 * tp->advmss &&
 402         tp->window_clamp + tp->advmss > maxwin)
 403         tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
 404 
 405     tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
 406     tp->snd_cwnd_stamp = tcp_time_stamp;
 407 }
   在“建立”状态下接收窗口的更新情况有:

(1)收到数据调用tcp_event_data_recv函数时:

 584 static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 585 {
...
 626     if (skb->len >= 128)
 627         tcp_grow_window(sk, skb);
 628 }
  tcp_grow_window函数用于更新当前最大通告窗口值tp->rcv_ssthresh:
 305 static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 306 {
 307     struct tcp_sock *tp = tcp_sk(sk);
 308     /* Optimize this! */
 309     int truesize = tcp_win_from_space(skb->truesize) >> 1;
 310     int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; //允许增加的最大window大小
 311 
 312     while (tp->rcv_ssthresh <= window) {
 313         if (truesize <= skb->len)
 314             return 2 * inet_csk(sk)->icsk_ack.rcv_mss;
 315 
 316         truesize >>= 1;
 317         window >>= 1;
 318     }
 319     return 0; //不增大rcv_ssthresh
 320 }
 321  
322 static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 323 {
 324     struct tcp_sock *tp = tcp_sk(sk);
 325 
 326     /* Check #1 */
 327     if (tp->rcv_ssthresh < tp->window_clamp && //rcv_ssthresh小于上限
 328         (int)tp->rcv_ssthresh < tcp_space(sk) && //srcv_sthresh小于可用空间大小
 329         !sk_under_memory_pressure(sk)) { //内存不紧张
 330         int incr;
 331   //可以增大
 332         /* Check #2. Increase window, if skb with such overhead
 333          * will fit to rcvbuf in future.
 334          */
 335         if (tcp_win_from_space(skb->truesize) <= skb->len) //如果只增加skb->truesize大小的空间,但其映射出来的窗口值小于skb中数据的长度
 336             incr = 2 * tp->advmss; //增加2个MSS大小
 337         else
 338             incr = __tcp_grow_window(sk, skb);
 339 
 340         if (incr) {
 341             incr = max_t(int, incr, 2 * skb->len);
 342             tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
 343                            tp->window_clamp); //增大rcv_ssthresh
 344             inet_csk(sk)->icsk_ack.quick |= 1; //允许快速回复ACK
 345         }
 346     }
 347 }
(2)TCP调用tcp_try_rmem_schedule整理接收缓存时:

4061 static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4062                  unsigned int size)
4063 {
4064     if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || //接收缓存分配超量
4065         !sk_rmem_schedule(sk, skb, size)) { //全局缓存或接收缓存空间达到上限
4066 
4067         if (tcp_prune_queue(sk) < 0) //释放一部分缓存,必要时会清空乱序队列
...
  tcp_prune_queue函数:

4623 static int tcp_prune_queue(struct sock *sk)
4624 {
...
4631     if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) //接收缓存紧张
4632         tcp_clamp_window(sk);
4633     else if (sk_under_memory_pressure(sk)) //内核内存紧张
4634         tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); //缩小rcv_ssthresh
  tcp_clamp_window函数会更新rcv_ssthresh:

 410 static void tcp_clamp_window(struct sock *sk)
 411 {
 412     struct tcp_sock *tp = tcp_sk(sk);
 413     struct inet_connection_sock *icsk = inet_csk(sk);
 414 
 415     icsk->icsk_ack.quick = 0;
 416 
 417     if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
 418         !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
 419         !sk_under_memory_pressure(sk) &&
 420         sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
 421         sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
 422                     sysctl_tcp_rmem[2]);
 423     }
 424     if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
 425         tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
 426 }

(3)应用进程通过系统调用copy完数据后,调用tcp_rcv_space_adjust调整接收缓存空间:

522 void tcp_rcv_space_adjust(struct sock *sk)
 523 {
...
 535     space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
 536 
 537     space = max(tp->rcvq_space.space, space);
 538 
 539     if (tp->rcvq_space.space != space) {
...
 544         if (sysctl_tcp_moderate_rcvbuf &&
 545             !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
 546             int new_clamp = space;
 547 
 548             /* Receive space grows, normalize in order to
 549              * take into account packet headers and sk_buff
 550              * structure overhead.
 551              */
 552             space /= tp->advmss; //space转换为最大报文段个数
 553             if (!space)
 554                 space = 1;
 555             rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
 556             while (tcp_win_from_space(rcvmem) < tp->advmss)
 557                 rcvmem += 128;
 558             space *= rcvmem;
 559             space = min(space, sysctl_tcp_rmem[2]);
 560             if (space > sk->sk_rcvbuf) {
 561                 sk->sk_rcvbuf = space;
 562 
 563                 /* Make the window clamp follow along.  */
 564                 tp->window_clamp = new_clamp; //更新最大通告窗口值
...

  数据接收方发送的通告窗口被数据发送方接收到后,就称为发送窗口。对于发送窗口,发包方收到ACK后会调用tcp_ack函数进行处理。tcp_ack函数在慢速处理路径会调用tcp_ack_update_window函数更新发送窗口(在快速处理路径下通告窗口不变,发送窗口也就不变):

3218 static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
3219                  u32 ack_seq)
3220 {
3221     struct tcp_sock *tp = tcp_sk(sk);
3222     int flag = 0;
3223     u32 nwin = ntohs(tcp_hdr(skb)->window); //得到通告窗口
3224
3225     if (likely(!tcp_hdr(skb)->syn))    //非SYN包
3226         nwin <<= tp->rx_opt.snd_wscale; //得到真正的窗口大小
3227
3228     if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { //可以进行窗口更新
3229         flag |= FLAG_WIN_UPDATE;       
3230         tcp_update_wl(tp, ack_seq); //记录更新窗口时的ack_seq
3231
3232         if (tp->snd_wnd != nwin) {         //通告窗口变化
3233             tp->snd_wnd = nwin; //更新发送窗口
3234
3235             /* Note, it is the only place, where
3236              * fast path is recovered for sending TCP.
3237              */
3238             tp->pred_flags = 0;            
3239             tcp_fast_path_check(sk); //试着开启快速处理路径
3240
3241             if (nwin > tp->max_window) {
3242                 tp->max_window = nwin; //更新最大发送窗口信息
3243                 tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie); //更新MSS信息
3244             }
3245         }
3246     }
3247
3248     tp->snd_una = ack;
3249
3250     return flag;
3251 }
   tcp_may_update_window函数判断是否允许更新窗口:
3204 static inline bool tcp_may_update_window(const struct tcp_sock *tp,
3205                     const u32 ack, const u32 ack_seq,
3206                     const u32 nwin)                
3207 {
3208     return  after(ack, tp->snd_una) || //确认了一部分数据
3209         after(ack_seq, tp->snd_wl1) || //从上次更新窗口到现在对端收到了新的数据
3210         (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); //报文的seq和ack_seq都没有变化,但窗口变大了,即为窗口更新报文
3211 }
  3208-3209:有更多的新数据被确认时通常对端的接收缓存大小会改变,从而通告窗口会变化。即使窗口不变,旧数据被确认后窗口的右边缘右移,从而可以发送更多数据。

6.2.3 窗口消费

  窗口的值对于数据收发双方有着不同的意义:数据发送方利用窗口判断数据是否可以发送;接收方使用窗口来过滤接收到的数据。先看发送方:

 1811 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1812                int push_one, gfp_t gfp)
1813 {
1814     struct tcp_sock *tp = tcp_sk(sk);
1815     struct sk_buff *skb;
1816     unsigned int tso_segs, sent_pkts;
1817     int cwnd_quota;
1818     int result;
...
1832     while ((skb = tcp_send_head(sk))) {
1833         unsigned int limit;
...
1851         if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) //检查发送窗口是否允许发送数据
1852             break; //不允许则中止发送
   tcp_snd_wnd_test函数检查发送窗口:
1490 static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1491                  const struct sk_buff *skb,     
1492                  unsigned int cur_mss)
1493 {
1494     u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1495
1496     if (skb->len > cur_mss)
1497         end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1498
1499     return !after(end_seq, tcp_wnd_end(tp));//tcp_wnd_end(tp)为tp->snd_una + tp->snd_wnd,即发送窗口允许发送的最高序列号
1500 }
  只有当end_seq <= snd_una + tp_snd_wnd时发送的数据才能全部落入窗口之内。

  再来看数据接收端,慢速路径中在检查报文seq合法性时会调用tcp_sequence来检查数据是否在窗口之内:

3738 static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
3739 {
3740     return  !before(end_seq, tp->rcv_wup) && 
3741         !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
3742 }
  3740:如果before(end_seq, rcv_wup)为真,则意味着全部数据段都在窗口左边,即全部为旧数据

  3741:如果after(seq, rcv_nxt + rcv_win)为真,则说明全部数据端位于窗口右边,即超出窗口

  tcp_receive_window的功能是计算接收窗口(即对端的发送窗口):

 651 static inline u32 tcp_receive_window(const struct tcp_sock *tp)
 652 {
 653     s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
 654 
 655     if (win < 0)
 656         win = 0;
 657     return (u32) win;
 658 }
  653:rcv_wup为最后一次发送窗口通告时的rcv_nxt,rcv_wnd为最后一次发送窗口通告时的窗口值,rcv_nxt - rcv_wup为最后一次发送窗口通告到现在所收到的数据长度(不包括乱序数据),当前接收窗口的大小为:最后一次通告的窗口 - 当前已经接收的数据长度 = rcv_wnd - (rcv_nxt - rcv_wup) = rcv_wup + rcv_wnd - rcv_nxt。
  tcp_data_queue中会进行更严格的检查:
4300 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4301 {
...
4321     if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
4322         if (tcp_receive_window(tp) == 0) //0窗口,不能接收数据
4323             goto out_of_window;
...
4385 out_of_window:
4386         tcp_enter_quickack_mode(sk);
4387         inet_csk_schedule_ack(sk);
4388 drop:
4389         __kfree_skb(skb);
4390         return;
4391     }
...
4392 
4393     /* Out of window. F.e. zero window probe. */
4394     if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) //数据全部在窗口之外
4395         goto out_of_window;
..
4399     if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
4400         /* Partial packet, seq < rcv_next < end_seq */
4401         SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
4402                tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
4403                TCP_SKB_CB(skb)->end_seq);
4404 
4405         tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
4406 
4407         /* If window is closed, drop tail of packet. But after
4408          * remembering D-SACK for its head made in previous line.
4409          */
4410         if (!tcp_receive_window(tp)) //0窗口,乱序数据也不能收
4411             goto out_of_window;
...
  4394-4395:能到达tcp_data_queue的包必然通过了tcp_sequence的检查,即seq <= rcv_nxt + rcv_win成立。但如果seq == rcv_nxt + rcv_win为真,则在此处也会被判断为“out of window”,被丢弃,也就是说tcp_data_queue的检查比tcp_sequeue多了一个“相等”的情况。为什么不在tcp_sequeuce中做这个严格的检查呢?我认为对于seq == rcv_nxt + rcv_wind为真的包,其ack_seq可能是合法的,tcp_ack函数需要用这个包头中的ack_seq和窗口等信息;由于tcp_data_queue只对数据感兴趣,这种“没有一字节数据在窗口内”的包自然就没有用了。

  至此,TCP滑动窗口机制的探究就到这里了,我们还会在后续的旅程中再次看到它的身影。