查看问题详情

编号项目分类查看权限最后更新
0000281Anolis OS 8- cloud kernel 4.19public2021-09-17 10:47
报告员maqiao_mq 分派给 
优先级normal严重性minor出现频率always
状态 new处理状况open 
平台x86_64操作系统Anolis OS操作系统版本8
产品版本8.2-rc1 
标题0000281: UDP 类型 IPVS RS 变更时丢包问题
描述UDP 协议 RS 退出时,Pod 网络通过 IPVS 模块访问 UDP Service,如果源端口复用,会命中 IPVS Connection 中已有五元组条目,首包会被 DROP。
该问题会导致 CoreDNS 所在 ECS 节点重启、CoreDNS 容器更新时,集群内出现长达 5 分钟的概率性域名解析超时问题。
参考https://github.com/torvalds/linux/commit/35dfb013149f74c2be1ff9c78f14e6a3cd1539d1
问题重现步骤1. 构建、安装 IPVS 模块

替换 ip_vs.ko 到 /lib/modules/4.19.91-23.al7.x86_64/kernel/net/netfilter/ipvs/ip_vs.ko 后重启

通过 sysctl -w net.ipv4.vs.debug_level=13 打开全部日志

2. 测试过程
[root@iZbp135xgdpstsq0sgxrq2Z ~]# ipvsadm -Lcn | grep -i udp | grep 39053
UDP 04:27 UDP 172.20.1.25:39053 172.21.0.10:53 172.20.1.32:53

# 删除 172.20.1.32 后端,并复用 39053 端口测试
[root@iZbp135xgdpstsq0sgxrq2Z ~]# nsenter -n/var/run/docker/netns/792eb10013d4 dig -b 172.20.1.25#39053 redis-master.default.svc.cluster.local @172.21.0.10

; <<>> DiG 9.11.4-P2-RedHat-9.11.4-26.P2.1.al7.4 <<>> -b 172.20.1.25#39053 redis-master.default.svc.cluster.local @172.21.0.10
;; global options: +cmd
;; Got answer:
;; WARNING: .local is reserved for Multicast DNS
;; You are currently testing what happens when an mDNS query is leaked to DNS
;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 14694
;; flags: qr aa rd; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 1
;; WARNING: recursion requested but not available

;; OPT PSEUDOSECTION:
; EDNS: version: 0, flags:; udp: 4096
;; QUESTION SECTION:
;redis-master.default.svc.cluster.local. IN A

;; ANSWER SECTION:
redis-master.default.svc.cluster.local. 5 IN A 172.21.6.185

;; Query time: 0 msec
;; SERVER: 172.21.0.10#53(172.21.0.10)
;; WHEN: Fri Aug 27 16:33:17 CST 2021
;; MSG SIZE rcvd: 121

[root@iZbp135xgdpstsq0sgxrq2Z ~]# ipvsadm -Lcn | grep -i udp | grep 39053
UDP 04:58 UDP 172.20.1.25:39053 172.21.0.10:53 172.20.0.25:53

3.IPVS 模块日志
# 172.20.1.32 仍存在着的访问
[ 5897.768810] IPVS: lookup/out UDP 172.20.1.25:39053->172.21.0.10:53 not hit
[ 5897.768813] IPVS: lookup/in UDP 172.20.1.25:39053->172.21.0.10:53 hit
[ 5897.768815] IPVS: Enter: ip_vs_nat_xmit, extra//ip_vs_xmit.c line 742
[ 5897.768818] IPVS: ip_vs_update_conntrack: Updating conntrack ct=00000000c15f80aa, status=0x100, ctinfo=2, old reply=172.21.0.10:53->172.20.1.25:39053/17
[ 5897.768819] IPVS: ip_vs_update_conntrack: Updating conntrack ct=00000000c15f80aa, status=0x100, ctinfo=2, new reply=172.20.1.32:53->172.20.1.25:39053/17
[ 5897.768821] IPVS: ip_vs_update_conntrack: Updated conntrack ct=00000000c15f80aa for cp=172.20.1.25:39053->172.21.0.10:53->172.20.1.32:53/17:0
[ 5897.768831] IPVS: Leave: ip_vs_nat_xmit, extra//ip_vs_xmit.c line 813
[ 5897.769016] IPVS: lookup/out UDP 172.20.1.32:53->172.20.1.25:39053 hit
[ 5897.769020] IPVS: Leave: handle_response, extra//ip_vs_core.c line 1308

# 172.20.1.32 已经被删除后的访问
[ 5937.376994] IPVS: lookup/out UDP 172.20.1.25:39053->172.21.0.10:53 not hit
[ 5937.376996] IPVS: lookup/in UDP 172.20.1.25:39053->172.21.0.10:53 hit
[ 5937.376996] IPVS: DEST1
[ 5937.376996] IPVS: DEST3
[ 5937.376996] IPVS: DEST4
[ 5937.377536] IPVS: ip_vs_conn_drop_conntrack: dropping conntrack for conn 172.20.1.25:39053->172.21.0.10:53->172.20.1.32:53/17:0
[ 5937.377539] IPVS: ip_vs_conn_drop_conntrack: no conntrack for tuple=172.20.1.25:39053->172.21.0.10:53/17
[ 5937.377542] IPVS: Unbind-dest UDP c:172.20.1.25:39053 v:172.21.0.10:53 d:172.20.1.32:53 fwd:M s:0 conn->flags:10100 conn->refcnt:0 dest->refcnt:76

# /etc/resolv.conf 设置 timeout 为 2 秒,2 秒后再次发包
[ 5939.376782] IPVS: lookup/out UDP 172.20.1.25:39053->172.21.0.10:53 not hit
[ 5939.376785] IPVS: lookup/in UDP 172.20.1.25:39053->172.21.0.10:53 not hit
[ 5939.376786] IPVS: lookup service: fwm 0 UDP 172.21.0.10:53 hit
[ 5939.376790] IPVS: Bind-dest UDP c:172.20.1.25:39053 v:172.21.0.10:53 d:172.20.0.25:53 fwd:M s:0 conn->flags:100 conn->refcnt:1 dest->refcnt:9
[ 5939.376791] IPVS: Schedule fwd:M c:172.20.1.25:39053 v:172.21.0.10:53 d:172.20.0.25:53 conn->flags:10140 conn->refcnt:2
[ 5939.376793] IPVS: Enter: ip_vs_nat_xmit, extra//ip_vs_xmit.c line 742
[ 5939.376797] IPVS: ip_vs_update_conntrack: Updating conntrack ct=00000000438bfb8d, status=0x100, ctinfo=2, old reply=172.21.0.10:53->172.20.1.25:39053/17
[ 5939.376798] IPVS: ip_vs_update_conntrack: Updating conntrack ct=00000000438bfb8d, status=0x100, ctinfo=2, new reply=172.20.0.25:53->172.20.1.25:39053/17
[ 5939.376799] IPVS: ip_vs_update_conntrack: Updated conntrack ct=00000000438bfb8d for cp=172.20.1.25:39053->172.21.0.10:53->172.20.0.25:53/17:0
[ 5939.376827] IPVS: Leave: ip_vs_nat_xmit, extra//ip_vs_xmit.c line 813
[ 5939.377458] IPVS: lookup/out UDP 172.20.0.25:53->172.20.1.25:39053 hit
[ 5939.377462] IPVS: Leave: handle_response, extra//ip_vs_core.c line 1308

4. 丢包点位
// net/netfilter/ipvs/ip_vs_core.c
static unsigned int
ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
    ...
        IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");

        /* Check the server status */
        if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                /* the destination server is not available */

                __u32 flags = cp->flags;

                /* when timer already started, silently drop the packet.*/
                if (timer_pending(&cp->timer)) {
                        IP_VS_DBG_BUF(7, "DEST1");// 标记 1
                        __ip_vs_conn_put(cp);
                }
                else {
                        IP_VS_DBG_BUF(7, "DEST2");// 标记 2
                        ip_vs_conn_put(cp);
                }

                if (sysctl_expire_nodest_conn(ipvs) &&
                    !(flags & IP_VS_CONN_F_ONE_PACKET)) {
                        IP_VS_DBG_BUF(7, "DEST3");// 标记 3
                        /* try to expire the connection immediately */
                        ip_vs_conn_expire_now(cp);
                }

                IP_VS_DBG_BUF(7, "DEST4");// 标记 4
                return NF_DROP; // 丢包点位
        }
    ...
}
附注Aone id: 36606734
标签没加标签.

活动

这个问题没有注释信息

问题历史

日期 用户名 字段 更改
2021-09-17 10:47 maqiao_mq 新建问题