[Service] Type=notify # the default is not to use systemd for cgroups because the delegate issues still # exists and systemd currently does not support the cgroup feature set required # for containers run by docker ExecStart=/usr/bin/dockerd -H fd:// --containerd=/run/containerd/containerd.sock --bip=10.244.12.1/24 --mtu=1450 ExecReload=/bin/kill -s HUP $MAINPID TimeoutStartSec=0 RestartSec=2 Restart=always
// CmdArgs captures all the arguments passed in to the plugin // via both env vars and stdin type CmdArgs struct { ContainerID string Netns string IfName string Args string Path string NetnsOverride string StdinData []byte }
go build -o example . echo "Ready to call the cni program and create resources" sudo CNI_COMMAND=ADD CNI_CONTAINERID=ns1 CNI_NETNS=/var/run/netns/ns1 CNI_IFNAME=eth10 CNI_PATH=`pwd` ./example < config echo "The CNI has been called, see the following results"
得到以下输出:
1 2 3 4 5 6 7 8 9 10
[root@master knet]# ./run.sh Ready to call the cni program and create resources interfance Name: eth10 netns path: /var/run/netns/ns1 the config data: { "name": "mynet", "BridgeName": "test", "IP": "192.0.2.1/24" } The CNI has been called, see the following results
当把配置文件中的数据转化为代码当中的数据结构,接下来我们就需要使用这些数据调用内核接口创建对应的内核资源。就但从目前的实现目标创建 Linux 网桥来说,可以通过原始的 os.Exec 创建,不过这样就需要去深入到不同操作系统和内核的功能实现上,为覆盖这部分的复杂性,我们就直接借用开源的调用来实现这部分的功能
funcinit() { // this ensures that main runs only on main thread (thread group leader). // since namespace ops (unshare, setns) are done for a single thread, we // must ensure that the goroutine does not jump from OS thread to thread runtime.LockOSThread() }
通过添加上述的代码,我们可以成功给多个 ns 容器提供自己的 veth 设备并接入网桥获取了连通性。然而每个容器依旧是在 L3 不通的,他们并没有唯一可以表示彼此的 IP 地址。 接下来,我们进一步来实验看看如何将 IP 地址分配给各自的容器。
IP 地址管理在集群中往往是结合 IPAM 插件实现相关的功能,通过接入 k8s apiserver 来同步和获取当前集群内的 IP 地址划分情况以及这些 IP 地址所分配到的节点地址。 这里的节点地址是提供给 k8s 建立集群的内网地址,也就是说如果是跨集群的情况,通过这样的方式 ip capsulated 使用的是节点地址,此时如果节点之间二层不可通的话,那么容器之间通过 PodIP 就无法找到彼此。
// 位于 addr.go // Addr represents an IP address from netlink. Netlink ip addresses // include a mask, so it stores the address as a net.IPNet. type Addr struct { *net.IPNet Label string Flags int Scope int Peer *net.IPNet Broadcast net.IP PreferedLft int ValidLft int LinkIndex int }
// 位于 addr_linux.go // AddrAdd will add an IP address to a link device. // // Equivalent to: `ip addr add $addr dev $link` // // If `addr` is an IPv4 address and the broadcast address is not given, it // will be automatically computed based on the IP mask if /30 or larger. funcAddrAdd(link Link, addr *Addr)error { return pkgHandle.AddrAdd(link, addr) }
// AddrAdd will add an IP address to a link device. // // Equivalent to: `ip addr add $addr dev $link` // // If `addr` is an IPv4 address and the broadcast address is not given, it // will be automatically computed based on the IP mask if /30 or larger. func(h *Handle) AddrAdd(link Link, addr *Addr) error { req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) return h.addrHandle(link, addr, req) }
我们使用 golang 提供的 net 包来生成 net.IPNet 类型和它的CIDR形式(IP地址和Mask),然后通过 net.ParseCIDR 来解析配置文件中获取的IP字符串并返回一个 net.IPNet 的指针。而这几步都需要在创建对应网络资源的时候完成绑定,所以我们需要修改前面的处理程序,在创建 veth 时分配 IP 地址。由于从 net.ParseCIDR 得到的 net.IPNet 对象是子网而不是真正的 IP 地址,接下来需要依据此子网生成合适的 IP 地址重新分配。
# 删除先前创建的资源 sudo ip netns del ns1 sudo ifconfig test down sudo brctl delbr test # 重新创建 ns1 来模拟容器 sudo ip netns add ns1 go build -o example . # 执行 cni 来生成地址 echo "Ready to call the cni to create ip for ns1" sudo CNI_COMMAND=ADD CNI_CONTAINERID=ns1 CNI_NETNS=/var/run/netns/ns1 CNI_IFNAME=eth10 CNI_PATH=`pwd` ./example < config echo "The CNI has been called, see the following results" echo "The bridge and the veth has been attatch to" sudo brctl show test echo "The interface in the netns" sudo ip netns exec ns1 ifconfig -a
[root@master knet]# ./run.sh Cannot remove namespace file "/var/run/netns/ns1": No such file or directory test: ERROR while getting interface flags: No such device bridge test doesn't exist; can't delete it Ready to call the cni to create ip for ns1 {test 192.0.2.15/24} The CNI has been called, see the following results The bridge and the veth has been attatch to bridge name bridge id STP enabled interfaces test 8000.b6e6090625de no veth2a9d8a3d The interface in the netns eth10: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500 inet 192.0.2.15 netmask 255.255.255.0 broadcast 192.0.2.255 inet6 fe80::477:7aff:fee3:a9b8 prefixlen 64 scopeid 0x20<link> ether 06:77:7a:e3:a9:b8 txqueuelen 0 (Ethernet) RX packets 1 bytes 90 (90.0 B) RX errors 0 dropped 0 overruns 0 frame 0 TX packets 1 bytes 90 (90.0 B) TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0