0%

容器实现-构造实现run命令版本的容器

经过namespace,cgroup和overlay2的简单认识后,该文章将实现一个run命令版本的容器,该程序能够识别命令行参数,运行namespace隔离的程序,并且能挂载/proc目录。

实现效果

先来看看实现效果吧。

使用说明界面,主要使用了github.com/urfave/cli库,以提供方便的命令行参数操作。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
[root@staight chmdocker]# ./chmdocker 
NAME:
chmdocker - chmdocker is a simple container runtime implementation.
Enjoy it, just for fun.

USAGE:
chmdocker [global options] command [command options] [arguments...]

VERSION:
0.0.1

COMMANDS:
init Init container process run user's process in container. Do not call it outside
run Create a container with namespace and cgroups limit
mydocker run -ti [command]
help, h Shows a list of commands or help for one command

GLOBAL OPTIONS:
--help, -h show help
--version, -v print the version

run子命令的使用说明:

1
2
3
4
5
6
7
8
9
10
[root@staight chmdocker]# ./chmdocker help run
NAME:
chmdocker run - Create a container with namespace and cgroups limit
mydocker run -ti [command]

USAGE:
chmdocker run [command options] [arguments...]

OPTIONS:
--ti enable tty

接下来run一个容器:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
[root@staight chmdocker]# ./chmdocker run --ti /bin/sh
{"level":"info","msg":"init come on","time":"2019-10-07T11:39:57+08:00"}
{"level":"info","msg":"command /bin/sh","time":"2019-10-07T11:39:57+08:00"}
{"level":"info","msg":"command /bin/sh","time":"2019-10-07T11:39:57+08:00"}
sh-4.2# ip addr
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
sh-4.2# ps -elf
F S UID PID PPID C PRI NI ADDR SZ WCHAN STIME TTY TIME CMD
4 S root 1 0 0 80 0 - 28893 do_wai 11:51 pts/1 00:00:00 /bin/sh
0 R root 6 1 0 80 0 - 38841 - 11:52 pts/1 00:00:00 ps -elf
sh-4.2# ls /proc
1 buddyinfo cmdline crypto dma fb interrupts irq keys kpagecount locks misc mtrr partitions scsi softirqs sys timer_list uptime vmstat
5 bus consoles devices driver filesystems iomem kallsyms key-users kpageflags mdstat modules net sched_debug self stat sysrq-trigger timer_stats version zoneinfo
acpi cgroups cpuinfo diskstats execdomains fs ioports kcore kmsg loadavg meminfo mounts pagetypeinfo schedstat slabinfo swaps sysvipc tty vmallocinfo
sh-4.2# exit
exit

代码分析

先来看看程序入口:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
package main

import (
log "github.com/Sirupsen/logrus"
"github.com/urfave/cli"
"os"
)

const usage = `chmdocker is a simple container runtime implementation.
Enjoy it, just for fun.`

func main() {
// 初始化命令行参数的配置
app := cli.NewApp()
app.Name = "chmdocker"
app.Version = "0.0.1"
app.Usage = usage

// 配置子命令:init,run
app.Commands = []cli.Command{
initCommand,
runCommand,
}

// 运行app之前的操作,这里主要是配置log
app.Before = func(context *cli.Context) error {
// Log as JSON instead of the default ASCII formatter.
log.SetFormatter(&log.JSONFormatter{})

log.SetOutput(os.Stdout)
return nil
}

// 运行app
if err := app.Run(os.Args); err != nil {
log.Fatal(err)
}
}

该段代码负责初始化cli,并加载子命令initCommandrunCommand

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
var runCommand = cli.Command{
Name: "run",
Usage: `Create a container with namespace and cgroups limit
mydocker run -ti [command]`,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "ti",
Usage: "enable tty",
},
},
// 输入参数后执行的操作
Action: func(context *cli.Context) error {
if len(context.Args()) < 1 {
return fmt.Errorf("Missing container command")
}
cmd := context.Args().Get(0)
tty := context.Bool("ti")
// 运行Run函数
Run(tty, cmd)
return nil
},
}

var initCommand = cli.Command{
Name: "init",
Usage: "Init container process run user's process in container. Do not call it outside",
Action: func(context *cli.Context) error {
log.Infof("init come on")
cmd := context.Args().Get(0)
log.Infof("command %s", cmd)
err := container.RunContainerInitProcess(cmd, nil)
return err
},
}

runCommand执行了Run函数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
package main

import (
"chmdocker/container"
log "github.com/Sirupsen/logrus"
"os"
)

func Run(tty bool, command string) {
// 配置新进程环境
parent := container.NewParentProcess(tty, command)
if err := parent.Start(); err != nil {
log.Error(err)
}
parent.Wait()
os.Exit(-1)
}

Run函数创建了一个子进程,container位于chmdocker/container文件夹中,负责初始化子进程参数:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
package container

import (
"os"
"os/exec"
"syscall"
)

func NewParentProcess(tty bool, command string) *exec.Cmd {
args := []string{"init", command}
// /proc/self/是一个链接,指向进程自身,/proc/PID/
cmd := exec.Command("/proc/self/exe", args...)
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUTS | syscall.CLONE_NEWPID | syscall.CLONE_NEWNS |
syscall.CLONE_NEWNET | syscall.CLONE_NEWIPC,
}
if tty {
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
}
return cmd
}

可以看到,新创建的子进程位于独立的命名空间,是使用了init参数的程序自己/proc/self/exe

init子命令负责初始化容器内进程,比如说挂载/proc目录,以及以后使用cgroup均可在这里执行:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
package container

import (
"github.com/Sirupsen/logrus"
"os"
"syscall"
)

func RunContainerInitProcess(command string, args []string) error {
logrus.Infof("command %s", command)

// 挂载rootfs,指定为独立的mount命名空间(默认为共享)
syscall.Mount("", "/", "", syscall.MS_PRIVATE|syscall.MS_REC, "")
defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
// 挂载/proc目录
syscall.Mount("proc", "/proc", "proc", uintptr(defaultMountFlags), "")
argv := []string{command}
// 运行命令
if err := syscall.Exec(command, argv, os.Environ()); err != nil {
logrus.Errorf(err.Error())
}
return nil
}

syscall.Exec()函数负责创建子进程,并覆盖父进程的内存,堆栈空间等等信息。到这里,容器运行就成功了。

大功告成~