Use CEPH as persistent storage for NOMAD

Use CEPH as persistent storage for NOMAD

Summary:

To orchestrate containers, Nomad is a a more and more popular choice, it supports CSI plugins so ceph-csi can directly be used.

We’ll see how to use Ceph (RBD) to store container’s data.

Content

Requirements

  • Nomad cluster (v1.1.1)
  • Ceph cluster (octopus 15.2.10)
  • ceph-csi (3.3.1)
  • Docker (20.10.7)

Configure NOMAD

We’ll use docker and need to allow containers to use priviliged mode.

Edit /etc/nomad.d/nomad.hcl and it’s plugin section.

plugin "docker" {
config {
allow_privileged = true
}
}

Restart nomad systemctl restart nomad

Create Ceph CSI Controller and Plugin Node

The ceph-csi plugin requieres two components:

  • The controller plugin communicates with provider’s API.
  • The node plugin executes tasks on client like managing mounting points.

CEPH-CSI-PLUGIN-CONTROLLER.NOMAD

You need to replace :

  • e585be01-47aa-44bc-9a58-55415b3f4225 by your ceph fsuid
  • Monitors IP address by yours

job "ceph-csi-plugin-controller" {

datacenters = ["dc1"]

group "controller" {

network {

port "metrics" {}

}

task "ceph-controller" {

template {

data = <<EOF

[{

"clusterID": "e585be01-47aa-44bc-9a58-55415b3f4225",

"monitors": [

"192.168.112.5",

"192.168.112.6",

"192.168.112.7"

]

}]

EOF

destination = "local/config.json"

change_mode = "restart"

}

driver = "docker"

config {

image = "quay.io/cephcsi/cephcsi:v3.3.1"

volumes = [

"./local/config.json:/etc/ceph-csi-config/config.json"

]

mounts = [

{

type = "tmpfs"

target = "/tmp/csi/keys"

readonly = false

tmpfs_options = {

size = 1000000 # size in bytes

}

}

]

args = [

"--type=rbd",

"--controllerserver=true",

"--drivername=rbd.csi.ceph.com",

"--endpoint=unix://csi/csi.sock",

"--nodeid=${node.unique.name}",

"--instanceid=${node.unique.name}-controller",

"--pidlimit=-1",

"--logtostderr=true",

"--v=5",

"--metricsport=$${NOMAD_PORT_metrics}"

]

}

resources {

cpu = 500

memory = 256

}

service {

name = "ceph-csi-controller"

port = "metrics"

tags = [ "prometheus" ]

}

csi_plugin {

id = "ceph-csi"

type = "controller"

mount_dir = "/csi"

}

}

}

}

CEPH-CSI-PLUGIN-NODES.NOMAD

job "ceph-csi-plugin-nodes" {

datacenters = ["dc1"]

type = "system"

group "nodes" {

network {

port "metrics" {}

}

task "ceph-node" {

driver = "docker"

template {

data = <<EOF

[{

"clusterID": "e585be01-47aa-44bc-9a58-55415b3f4225",

"monitors": [

"192.168.112.5",

"192.168.112.6",

"192.168.112.7"

]

}]

EOF

destination = "local/config.json"

change_mode = "restart"

}

config {

image = "quay.io/cephcsi/cephcsi:v3.3.1"

volumes = [

"./local/config.json:/etc/ceph-csi-config/config.json"

]

mounts = [

{

type = "tmpfs"

target = "/tmp/csi/keys"

readonly = false

tmpfs_options = {

size = 1000000 # size in bytes

}

}

]

args = [

"--type=rbd",

"--drivername=rbd.csi.ceph.com",

"--nodeserver=true",

"--endpoint=unix://csi/csi.sock",

"--nodeid=${node.unique.name}",

"--instanceid=${node.unique.name}-nodes",

"--pidlimit=-1",

"--logtostderr=true",

"--v=5",

"--metricsport=$${NOMAD_PORT_metrics}"

]

privileged = true

}

resources {

cpu = 500

memory = 256

}

service {

name = "ceph-csi-nodes"

port = "metrics"

tags = [ "prometheus" ]

}

csi_plugin {

id = "ceph-csi"

type = "node"

mount_dir = "/csi"

}

}

}

}

We can now start them

nomad job run ceph-csi-plugin-controller.nomad

nomad job run ceph-csi-plugin-nodes.nomad

We can then directly check for the plugin status. It can take few minutes as it has to download image.

root@nomad112 ~/nomadceph $ nomad plugin status ceph-csi

ID = ceph-csi

Provider = rbd.csi.ceph.com

Version = 3.3.1

Controllers Healthy = 1

Controllers Expected = 1

Nodes Healthy = 1

Nodes Expected = 1

Allocations

ID Node ID Task Group Version Desired Status Created Modified

23b4db0c a61ef171 nodes 4 run running 3h26m ago 3h25m ago

fee74115 a61ef171 controller 6 run running 3h26m ago 3h25m ago

To be able to use ceph, our host must have the rbd module added to the kernel.

sudo lsmod |grep rbd

rbd 94208 2

libceph 364544 1 rbd

If it’s not the case, just load it by running sudo modprobe rbd

Since nomad 1.1.0 we can directly create RBD images with nomad.

CEPH-VOLUME.HCL

You need to replace :

  • e585be01-47aa-44bc-9a58-55415b3f4225 by your ceph fsuid vim ceph-csi-plugin-controller.nomad
  • Monitors IP address by yours
  • admin with your ceph user
  • AQAVXIlgv6krBhAATxGcAZZgOwW7mW9HdSPJxQ== with the user key

id = "ceph-mysql"

name = "ceph-mysql"

type = "csi"

plugin_id = "ceph-csi"

capacity_max = "200G"

capacity_min = "100G"

capability {

access_mode = "single-node-writer"

attachment_mode = "file-system"

}

secrets {

userID = "admin"

userKey = "AQAVXIlgv6krBhAATxGcAZZgOwW7mW9HdSPJxQ=="

}

parameters {

clusterID = "e585be01-47aa-44bc-9a58-55415b3f4225"

pool = "rbd"

imageFeatures = "layering"

}

Then we create it.

nomad volume create ceph-volume.hcl

Created external volume 0001-0024-e585be01-47aa-44bc-9a58-55415b3f4225-0000000000000002-100fc248-d011-11eb-b9f0-0242ac110002 with ID ceph-mysql

We’ll reuse the job from Hashicorp learn but edit it to use our RBD volume.

MYSQL.NOMAD

job "mysql-server" {

datacenters = ["dc1"]

type = "service"

group "mysql-server" {

count = 1

volume "ceph-mysql" {

type = "csi"

attachment_mode = "file-system"

access_mode = "single-node-writer"

read_only = false

source = "ceph-mysql"

}

network {

port "db" {

static = 3306

}

}

restart {

attempts = 10

interval = "5m"

delay = "25s"

mode = "delay"

}

task "mysql-server" {

driver = "docker"

volume_mount {

volume = "ceph-mysql"

destination = "/srv"

read_only = false

}

env {

MYSQL_ROOT_PASSWORD = "password"

}

config {

image = "hashicorp/mysql-portworx-demo:latest"

args = ["--datadir", "/srv/mysql"]

ports = ["db"]

}

resources {

cpu = 500

memory = 1024

}

service {

name = "mysql-server"

port = "db"

check {

type = "tcp"

interval = "10s"

timeout = "2s"

}

}

}

}

}

We can now start our job

nomad job run mysql.nomad

Check for job status to get it’s alloc, then list the items table.

nomad job status mysql-server

...

Status = running

...

Allocations

ID Node ID Task Group Version Desired Status Created Modified

38070da7 9ad01c63 mysql-server 0 run running 6s ago 3s ago

nomad alloc exec 38070da7 sh

# mysql -u root -p -D itemcollection

Enter password:

...

mysql> select * from items;

+----+----------+

| id | name |

+----+----------+

| 1 | bike |

| 2 | baseball |

| 3 | chair |

+----+----------+

Let’s add some items

mysql> INSERT INTO items (name) VALUES ('glove');

mysql> INSERT INTO items (name) VALUES ('hat');

mysql> INSERT INTO items (name) VALUES ('keyboard');

mysql> select * from items;

+----+----------+

| id | name |

+----+----------+

| 1 | bike |

| 2 | baseball |

| 3 | chair |

| 4 | glove |

| 5 | hat |

| 6 | keyboard |

+----+----------+

To ensure our DB is on persistent storage, we’ll delete the container and rebuild it with the same job.

nomad stop -purge mysql-server

nomad job run mysql.nomad

We check if data we are added are still here.

nomad alloc exec d8a401a0 sh

mysql -u root -p -D itemcollection

mysql> select * from items;

+----+----------+

| id | name |

+----+----------+

| 1 | bike |

| 2 | baseball |

| 3 | chair |

| 4 | glove |

| 5 | hat |

| 6 | keyboard |

+----+----------+

Like we created the image with nomad, we can delete it.

We first need to delete and purge the job.

nomad stop -purge mysql-server

Then we can delete the volume.

nomad volume delete ceph-mysql

Successfully deleted volume "ceph-mysql"!

In this example we used RBD but we could have use CephFS in the same way.

Ressources