Clear History

This commit is contained in:
Niccolo Borgioli 2023-05-11 17:44:13 +02:00
commit 4acb9b9ae8
No known key found for this signature in database
GPG Key ID: D93C615F75EE4F0B
77 changed files with 5363 additions and 0 deletions

55
.github/workflows/cd.yaml vendored Normal file
View File

@ -0,0 +1,55 @@
name: 'CD'
on:
pull_request:
push:
tags:
- 'v*'
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
defaults:
run:
working-directory: thesis
steps:
- name: Checkout
uses: actions/checkout@v3
with:
lfs: true
- uses: actions/setup-node@v3
with:
node-version: 18
- uses: actions/cache@v3
name: Tectonic Cache
with:
path: ~/.cache/Tectonic
key: ${{ runner.os }}-tectonic-${{ hashFiles('**/*.tex') }}
restore-keys: |
${{ runner.os }}-tectonic-
- uses: wtfjoke/setup-tectonic@v2
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Run Tectonic
run: make build
- name: Upload PDFs
uses: actions/upload-artifact@v3
with:
name: PDFs
path: 'thesis/Thesis.pdf'
release:
runs-on: ubuntu-latest
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
needs:
- build
steps:
- name: Release
uses: ncipollo/release-action@v1
with:
allowUpdates: true
artifacts: 'thesis/Thesis.pdf'
token: ${{ secrets.GITHUB_TOKEN }}

View File

@ -0,0 +1,16 @@
{
"folders": [
{
"path": "code"
},
{
"path": "thesis"
},
{
"path": "."
}
],
"settings": {
"cSpell.words": ["apiextensions", "iluzio", "nicco"]
}
}

5
code/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
__pycache__
.venv
.mypy_cache
node_modules

21
code/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Niccolo Borgioli
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

50
code/Makefile Normal file
View File

@ -0,0 +1,50 @@
# Cluster
cluster-up::
minikube start --cni calico
cluster-down::
minikube delete
cluster-dash::
minikube dashboard
cluster-env::
# minikube docker-env | source
eval $(minikube docker-env --shell bash)
# Namespace
ns-up::
kubectl create namespace simulator
kubens simulator
ns-down::
kubectl delete namespaces simulator
ns-reset:: ns-down ns-up
# Chaos Mesh
chaos-up::
curl -sSL https://mirrors.chaos-mesh.org/v2.5.1/install.sh | bash
chaos-dash::
minikube service chaos-dashboard -n chaos-mesh
# CRD
crd-up::
kubectl apply -f ./crd/iluzio.yaml
# Docker
docker::
./images/build.sh
# General
dash:
$(MAKE) -j 2 cluster-dash chaos-dash
start:: cluster-up ns-up chaos-up crd-up docker dash
reset:: cluster-down start
operator-up::
poetry install
poetry run kopf run -n simulator src/main.py

4
code/README.md Normal file
View File

@ -0,0 +1,4 @@
## Structure
- `images`: docker images shared for everything
- `src`: source of the simulator

10
code/Roadmap Normal file
View File

@ -0,0 +1,10 @@
First:
- api driven
- 2 nodes
- sidecar communication
- block connections with network policies
- ping each other
Second:
- crd driven
- kopf as operator

20
code/config.yaml Normal file
View File

@ -0,0 +1,20 @@
nodes:
iperf-server:
image: test
command: ["iperf3", "--server", "--one-off"]
iperf-client:
image: test
command: ["iperf3", "--client", "iperf-server", "--time", "10"]
ping-server:
image: test
ping-client:
image: test
command: ["ping", "ping-server"]
events:
- type: network
connect:
- iperf-client
- iperf-server

82
code/crd/iluzio.yaml Normal file
View File

@ -0,0 +1,82 @@
# Scenario
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: scenarios.iluzio.nicco.io
spec:
scope: Namespaced
group: iluzio.nicco.io
names:
kind: Scenario
plural: scenarios
singular: scenario
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
# Node
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: nodes.iluzio.nicco.io
spec:
scope: Namespaced
group: iluzio.nicco.io
names:
kind: Node
plural: nodes
singular: node
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
# Link
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: links.iluzio.nicco.io
spec:
scope: Namespaced
group: iluzio.nicco.io
names:
kind: Link
plural: links
singular: link
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true

16
code/crd/schema.yml Normal file
View File

@ -0,0 +1,16 @@
# https://editor.swagger.io/
# https://swagger.io/docs/specification/data-models/data-types/
openapi: 3.0.3
info:
title: Foo
version: 0.0.0
paths: {}
components:
schemas:
Scenario:
type: object
properties:
name:
type: string

12
code/images/build.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
cd "$(dirname "${BASH_SOURCE[0]}")"
eval $(minikube docker-env --shell bash)
function build() {
docker build -t $1 ./$1
}
build "idle" & build "sidecar"
wait

View File

@ -0,0 +1,5 @@
FROM ubuntu
RUN apt-get update && apt-get install -y iperf3 curl dnsutils iputils-ping netcat python3
CMD [ "sleep", "infinity" ]

View File

@ -0,0 +1,11 @@
FROM golang:1.20 as builder
WORKDIR /app
COPY . .
RUN go build
FROM ubuntu
RUN apt-get update && apt-get install -y dnsutils
WORKDIR /app
COPY --from=builder /app .
EXPOSE 42069
CMD /app/sidecar

View File

@ -0,0 +1,3 @@
module iluzio.nicco.io/sidecar
go 1.20

View File

@ -0,0 +1,44 @@
package main
import (
"encoding/json"
"fmt"
"log"
"net/http"
"os"
"os/exec"
"strings"
)
var SERVICE = os.Getenv("SERVICE")
func getIPs() []string {
out, err := exec.Command("dig", "+short", "+search", SERVICE).Output()
if err != nil {
log.Fatal(err)
return []string{}
}
trimmed := strings.TrimSpace(fmt.Sprintf("%s", out))
if trimmed == "" {
return []string{}
} else {
return strings.Split(trimmed, "\n")
}
}
func main() {
http.HandleFunc("/discoverable", func(w http.ResponseWriter, r *http.Request) {
ips := getIPs()
plain := r.URL.Query().Get("format") == "plain"
if plain {
fmt.Fprintf(w, "%s\n", strings.Join(ips, "\n"))
} else {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(ips)
}
})
log.Fatal(http.ListenAndServe(":42069", nil))
}

1022
code/poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

2
code/poetry.toml Normal file
View File

@ -0,0 +1,2 @@
[virtualenvs]
in-project = true

21
code/pyproject.toml Normal file
View File

@ -0,0 +1,21 @@
[tool.poetry]
name = "sample"
version = "0.1.0"
description = ""
authors = ["Your Name <you@example.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
kubernetes = "^25.3.0"
pyyaml = "^6.0"
kopf = "^1.36.0"
[tool.poetry.group.dev.dependencies]
mypy = "^0.991"
autopep8 = "^2.0.1"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

View File

@ -0,0 +1,10 @@
FROM node:18
WORKDIR /app
COPY package.json .
RUN npm i
COPY index.js .
EXPOSE 3000
CMD [ "node", "." ]

View File

@ -0,0 +1,20 @@
import Fastify from 'fastify'
const app = Fastify({ logger: { base: false } })
app.post('/transmit', async (request) => {
request.log.info({ data: request.body })
})
app.get('/time', async () => {
return new Date().toISOString()
})
try {
process.on('SIGINT', () => app.close())
process.on('SIGTERM', () => app.close())
await app.listen({ port: 3000, host: '0.0.0.0' })
} catch (err) {
app.log.error(err)
process.exit(1)
}

View File

@ -0,0 +1,7 @@
{
"main": "./index.js",
"type": "module",
"dependencies": {
"fastify": "^4.15.0"
}
}

View File

@ -0,0 +1,380 @@
lockfileVersion: 5.4
specifiers:
fastify: ^4.15.0
dependencies:
fastify: 4.15.0
packages:
/@fastify/ajv-compiler/3.5.0:
resolution: {integrity: sha512-ebbEtlI7dxXF5ziNdr05mOY8NnDiPB1XvAlLHctRt/Rc+C3LCOVW5imUVX+mhvUhnNzmPBHewUkOFgGlCxgdAA==}
dependencies:
ajv: 8.12.0
ajv-formats: 2.1.1
fast-uri: 2.2.0
dev: false
/@fastify/deepmerge/1.3.0:
resolution: {integrity: sha512-J8TOSBq3SoZbDhM9+R/u77hP93gz/rajSA+K2kGyijPpORPWUXHUpTaleoj+92As0S9uPRP7Oi8IqMf0u+ro6A==}
dev: false
/@fastify/error/3.2.0:
resolution: {integrity: sha512-KAfcLa+CnknwVi5fWogrLXgidLic+GXnLjijXdpl8pvkvbXU5BGa37iZO9FGvsh9ZL4y+oFi5cbHBm5UOG+dmQ==}
dev: false
/@fastify/fast-json-stringify-compiler/4.2.0:
resolution: {integrity: sha512-ypZynRvXA3dibfPykQN3RB5wBdEUgSGgny8Qc6k163wYPLD4mEGEDkACp+00YmqkGvIm8D/xYoHajwyEdWD/eg==}
dependencies:
fast-json-stringify: 5.6.2
dev: false
/abort-controller/3.0.0:
resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
engines: {node: '>=6.5'}
dependencies:
event-target-shim: 5.0.1
dev: false
/abstract-logging/2.0.1:
resolution: {integrity: sha512-2BjRTZxTPvheOvGbBslFSYOUkr+SjPtOnrLP33f+VIWLzezQpZcqVg7ja3L4dBXmzzgwT+a029jRx5PCi3JuiA==}
dev: false
/ajv-formats/2.1.1:
resolution: {integrity: sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==}
peerDependenciesMeta:
ajv:
optional: true
dependencies:
ajv: 8.12.0
dev: false
/ajv/8.12.0:
resolution: {integrity: sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA==}
dependencies:
fast-deep-equal: 3.1.3
json-schema-traverse: 1.0.0
require-from-string: 2.0.2
uri-js: 4.4.1
dev: false
/archy/1.0.0:
resolution: {integrity: sha512-Xg+9RwCg/0p32teKdGMPTPnVXKD0w3DfHnFTficozsAgsvq2XenPJq/MYpzzQ/v8zrOyJn6Ds39VA4JIDwFfqw==}
dev: false
/atomic-sleep/1.0.0:
resolution: {integrity: sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==}
engines: {node: '>=8.0.0'}
dev: false
/avvio/8.2.1:
resolution: {integrity: sha512-TAlMYvOuwGyLK3PfBb5WKBXZmXz2fVCgv23d6zZFdle/q3gPjmxBaeuC0pY0Dzs5PWMSgfqqEZkrye19GlDTgw==}
dependencies:
archy: 1.0.0
debug: 4.3.4
fastq: 1.15.0
transitivePeerDependencies:
- supports-color
dev: false
/base64-js/1.5.1:
resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
dev: false
/buffer/6.0.3:
resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==}
dependencies:
base64-js: 1.5.1
ieee754: 1.2.1
dev: false
/cookie/0.5.0:
resolution: {integrity: sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==}
engines: {node: '>= 0.6'}
dev: false
/debug/4.3.4:
resolution: {integrity: sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==}
engines: {node: '>=6.0'}
peerDependencies:
supports-color: '*'
peerDependenciesMeta:
supports-color:
optional: true
dependencies:
ms: 2.1.2
dev: false
/event-target-shim/5.0.1:
resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
engines: {node: '>=6'}
dev: false
/events/3.3.0:
resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==}
engines: {node: '>=0.8.x'}
dev: false
/fast-content-type-parse/1.0.0:
resolution: {integrity: sha512-Xbc4XcysUXcsP5aHUU7Nq3OwvHq97C+WnbkeIefpeYLX+ryzFJlU6OStFJhs6Ol0LkUGpcK+wL0JwfM+FCU5IA==}
dev: false
/fast-decode-uri-component/1.0.1:
resolution: {integrity: sha512-WKgKWg5eUxvRZGwW8FvfbaH7AXSh2cL+3j5fMGzUMCxWBJ3dV3a7Wz8y2f/uQ0e3B6WmodD3oS54jTQ9HVTIIg==}
dev: false
/fast-deep-equal/3.1.3:
resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==}
dev: false
/fast-json-stringify/5.6.2:
resolution: {integrity: sha512-F6xkRrXvtGbAiDSEI5Rk7qk2P63Y9kc8bO6Dnsd3Rt6sBNr2QxNFWs0JbKftgiyOfGxnJaRoHe4SizCTqeAyrA==}
dependencies:
'@fastify/deepmerge': 1.3.0
ajv: 8.12.0
ajv-formats: 2.1.1
fast-deep-equal: 3.1.3
fast-uri: 2.2.0
rfdc: 1.3.0
dev: false
/fast-querystring/1.1.1:
resolution: {integrity: sha512-qR2r+e3HvhEFmpdHMv//U8FnFlnYjaC6QKDuaXALDkw2kvHO8WDjxH+f/rHGR4Me4pnk8p9JAkRNTjYHAKRn2Q==}
dependencies:
fast-decode-uri-component: 1.0.1
dev: false
/fast-redact/3.1.2:
resolution: {integrity: sha512-+0em+Iya9fKGfEQGcd62Yv6onjBmmhV1uh86XVfOU8VwAe6kaFdQCWI9s0/Nnugx5Vd9tdbZ7e6gE2tR9dzXdw==}
engines: {node: '>=6'}
dev: false
/fast-uri/2.2.0:
resolution: {integrity: sha512-cIusKBIt/R/oI6z/1nyfe2FvGKVTohVRfvkOhvx0nCEW+xf5NoCXjAHcWp93uOUBchzYcsvPlrapAdX1uW+YGg==}
dev: false
/fastify/4.15.0:
resolution: {integrity: sha512-m/CaRN8nf5uyYdrDe2qqq+0z3oGyE+A++qlKQoLJTI4WI0nWK9D6R3FxXQ3MVwt/md977GMR4F43pE9oqrS2zw==}
dependencies:
'@fastify/ajv-compiler': 3.5.0
'@fastify/error': 3.2.0
'@fastify/fast-json-stringify-compiler': 4.2.0
abstract-logging: 2.0.1
avvio: 8.2.1
fast-content-type-parse: 1.0.0
find-my-way: 7.6.0
light-my-request: 5.9.1
pino: 8.11.0
process-warning: 2.1.0
proxy-addr: 2.0.7
rfdc: 1.3.0
secure-json-parse: 2.7.0
semver: 7.3.8
tiny-lru: 10.3.0
transitivePeerDependencies:
- supports-color
dev: false
/fastq/1.15.0:
resolution: {integrity: sha512-wBrocU2LCXXa+lWBt8RoIRD89Fi8OdABODa/kEnyeyjS5aZO5/GNvI5sEINADqP/h8M29UHTHUb53sUu5Ihqdw==}
dependencies:
reusify: 1.0.4
dev: false
/find-my-way/7.6.0:
resolution: {integrity: sha512-H7berWdHJ+5CNVr4ilLWPai4ml7Y2qAsxjw3pfeBxPigZmaDTzF0wjJLj90xRCmGcWYcyt050yN+34OZDJm1eQ==}
engines: {node: '>=14'}
dependencies:
fast-deep-equal: 3.1.3
fast-querystring: 1.1.1
safe-regex2: 2.0.0
dev: false
/forwarded/0.2.0:
resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
engines: {node: '>= 0.6'}
dev: false
/ieee754/1.2.1:
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
dev: false
/ipaddr.js/1.9.1:
resolution: {integrity: sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==}
engines: {node: '>= 0.10'}
dev: false
/json-schema-traverse/1.0.0:
resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==}
dev: false
/light-my-request/5.9.1:
resolution: {integrity: sha512-UT7pUk8jNCR1wR7w3iWfIjx32DiB2f3hFdQSOwy3/EPQ3n3VocyipUxcyRZR0ahoev+fky69uA+GejPa9KuHKg==}
dependencies:
cookie: 0.5.0
process-warning: 2.1.0
set-cookie-parser: 2.6.0
dev: false
/lru-cache/6.0.0:
resolution: {integrity: sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==}
engines: {node: '>=10'}
dependencies:
yallist: 4.0.0
dev: false
/ms/2.1.2:
resolution: {integrity: sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==}
dev: false
/on-exit-leak-free/2.1.0:
resolution: {integrity: sha512-VuCaZZAjReZ3vUwgOB8LxAosIurDiAW0s13rI1YwmaP++jvcxP77AWoQvenZebpCA2m8WC1/EosPYPMjnRAp/w==}
dev: false
/pino-abstract-transport/1.0.0:
resolution: {integrity: sha512-c7vo5OpW4wIS42hUVcT5REsL8ZljsUfBjqV/e2sFxmFEFZiq1XLUp5EYLtuDH6PEHq9W1egWqRbnLUP5FuZmOA==}
dependencies:
readable-stream: 4.3.0
split2: 4.2.0
dev: false
/pino-std-serializers/6.1.0:
resolution: {integrity: sha512-KO0m2f1HkrPe9S0ldjx7za9BJjeHqBku5Ch8JyxETxT8dEFGz1PwgrHaOQupVYitpzbFSYm7nnljxD8dik2c+g==}
dev: false
/pino/8.11.0:
resolution: {integrity: sha512-Z2eKSvlrl2rH8p5eveNUnTdd4AjJk8tAsLkHYZQKGHP4WTh2Gi1cOSOs3eWPqaj+niS3gj4UkoreoaWgF3ZWYg==}
hasBin: true
dependencies:
atomic-sleep: 1.0.0
fast-redact: 3.1.2
on-exit-leak-free: 2.1.0
pino-abstract-transport: 1.0.0
pino-std-serializers: 6.1.0
process-warning: 2.1.0
quick-format-unescaped: 4.0.4
real-require: 0.2.0
safe-stable-stringify: 2.4.3
sonic-boom: 3.3.0
thread-stream: 2.3.0
dev: false
/process-warning/2.1.0:
resolution: {integrity: sha512-9C20RLxrZU/rFnxWncDkuF6O999NdIf3E1ws4B0ZeY3sRVPzWBMsYDE2lxjxhiXxg464cQTgKUGm8/i6y2YGXg==}
dev: false
/process/0.11.10:
resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==}
engines: {node: '>= 0.6.0'}
dev: false
/proxy-addr/2.0.7:
resolution: {integrity: sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==}
engines: {node: '>= 0.10'}
dependencies:
forwarded: 0.2.0
ipaddr.js: 1.9.1
dev: false
/punycode/2.3.0:
resolution: {integrity: sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==}
engines: {node: '>=6'}
dev: false
/quick-format-unescaped/4.0.4:
resolution: {integrity: sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==}
dev: false
/readable-stream/4.3.0:
resolution: {integrity: sha512-MuEnA0lbSi7JS8XM+WNJlWZkHAAdm7gETHdFK//Q/mChGyj2akEFtdLZh32jSdkWGbRwCW9pn6g3LWDdDeZnBQ==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
dependencies:
abort-controller: 3.0.0
buffer: 6.0.3
events: 3.3.0
process: 0.11.10
dev: false
/real-require/0.2.0:
resolution: {integrity: sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==}
engines: {node: '>= 12.13.0'}
dev: false
/require-from-string/2.0.2:
resolution: {integrity: sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==}
engines: {node: '>=0.10.0'}
dev: false
/ret/0.2.2:
resolution: {integrity: sha512-M0b3YWQs7R3Z917WRQy1HHA7Ba7D8hvZg6UE5mLykJxQVE2ju0IXbGlaHPPlkY+WN7wFP+wUMXmBFA0aV6vYGQ==}
engines: {node: '>=4'}
dev: false
/reusify/1.0.4:
resolution: {integrity: sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==}
engines: {iojs: '>=1.0.0', node: '>=0.10.0'}
dev: false
/rfdc/1.3.0:
resolution: {integrity: sha512-V2hovdzFbOi77/WajaSMXk2OLm+xNIeQdMMuB7icj7bk6zi2F8GGAxigcnDFpJHbNyNcgyJDiP+8nOrY5cZGrA==}
dev: false
/safe-regex2/2.0.0:
resolution: {integrity: sha512-PaUSFsUaNNuKwkBijoAPHAK6/eM6VirvyPWlZ7BAQy4D+hCvh4B6lIG+nPdhbFfIbP+gTGBcrdsOaUs0F+ZBOQ==}
dependencies:
ret: 0.2.2
dev: false
/safe-stable-stringify/2.4.3:
resolution: {integrity: sha512-e2bDA2WJT0wxseVd4lsDP4+3ONX6HpMXQa1ZhFQ7SU+GjvORCmShbCMltrtIDfkYhVHrOcPtj+KhmDBdPdZD1g==}
engines: {node: '>=10'}
dev: false
/secure-json-parse/2.7.0:
resolution: {integrity: sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==}
dev: false
/semver/7.3.8:
resolution: {integrity: sha512-NB1ctGL5rlHrPJtFDVIVzTyQylMLu9N9VICA6HSFJo8MCGVTMW6gfpicwKmmK/dAjTOrqu5l63JJOpDSrAis3A==}
engines: {node: '>=10'}
hasBin: true
dependencies:
lru-cache: 6.0.0
dev: false
/set-cookie-parser/2.6.0:
resolution: {integrity: sha512-RVnVQxTXuerk653XfuliOxBP81Sf0+qfQE73LIYKcyMYHG94AuH0kgrQpRDuTZnSmjpysHmzxJXKNfa6PjFhyQ==}
dev: false
/sonic-boom/3.3.0:
resolution: {integrity: sha512-LYxp34KlZ1a2Jb8ZQgFCK3niIHzibdwtwNUWKg0qQRzsDoJ3Gfgkf8KdBTFU3SkejDEIlWwnSnpVdOZIhFMl/g==}
dependencies:
atomic-sleep: 1.0.0
dev: false
/split2/4.2.0:
resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==}
engines: {node: '>= 10.x'}
dev: false
/thread-stream/2.3.0:
resolution: {integrity: sha512-kaDqm1DET9pp3NXwR8382WHbnpXnRkN9xGN9dQt3B2+dmXiW8X1SOwmFOxAErEQ47ObhZ96J6yhZNXuyCOL7KA==}
dependencies:
real-require: 0.2.0
dev: false
/tiny-lru/10.3.0:
resolution: {integrity: sha512-vTKRT2AEO1sViFDWAIzZVpV8KURCaMtnHa4RZB3XqtYLbrTO/fLDXKPEX9kVWq9u+nZREkwakbcmzGgvJm8QKA==}
engines: {node: '>=12'}
dev: false
/uri-js/4.4.1:
resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==}
dependencies:
punycode: 2.3.0
dev: false
/yallist/4.0.0:
resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==}
dev: false

View File

@ -0,0 +1,9 @@
#!/bin/bash
cd "$(dirname "${BASH_SOURCE[0]}")"
eval $(minikube docker-env --shell bash)
docker build -t base-station ./base
docker build -t satellite ./sat
kubectl apply -f ./scenario.yaml

View File

@ -0,0 +1,10 @@
FROM node:18
WORKDIR /app
COPY package.json .
RUN npm install
COPY index.js .
STOPSIGNAL SIGTERM
CMD [ "node", "." ]

View File

@ -0,0 +1,31 @@
import pino from 'pino'
const logger = pino({ base: false })
const interval = setInterval(async () => {
const ips = await fetch('http://localhost:42069/discoverable')
.then((res) => res.json())
.catch(() => [])
if (!ips.length) {
logger.info('no peers found')
return
}
for (const ip of ips) {
// Sync some data
// Don't await on purpose
fetch(`http://${ip}:3000/time`)
.then((res) => res.text())
.then((time) => logger.info({ peer: ip }, `time from peer: ${time}`))
.catch(logger.error)
fetch(`http://${ip}:3000/transmit`, { method: 'POST', body: `Observation: ${Math.random()}` }).catch(logger.error)
}
}, 1000)
function exit() {
clearInterval(interval)
process.exit(0)
}
process.on('SIGINT', exit)
process.on('SIGTERM', exit)

View File

@ -0,0 +1,7 @@
{
"main": "./index.js",
"type": "module",
"dependencies": {
"pino": "^8.11.0"
}
}

View File

@ -0,0 +1,133 @@
lockfileVersion: 5.4
specifiers:
pino: ^8.11.0
dependencies:
pino: 8.11.0
packages:
/abort-controller/3.0.0:
resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
engines: {node: '>=6.5'}
dependencies:
event-target-shim: 5.0.1
dev: false
/atomic-sleep/1.0.0:
resolution: {integrity: sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==}
engines: {node: '>=8.0.0'}
dev: false
/base64-js/1.5.1:
resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
dev: false
/buffer/6.0.3:
resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==}
dependencies:
base64-js: 1.5.1
ieee754: 1.2.1
dev: false
/event-target-shim/5.0.1:
resolution: {integrity: sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==}
engines: {node: '>=6'}
dev: false
/events/3.3.0:
resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==}
engines: {node: '>=0.8.x'}
dev: false
/fast-redact/3.1.2:
resolution: {integrity: sha512-+0em+Iya9fKGfEQGcd62Yv6onjBmmhV1uh86XVfOU8VwAe6kaFdQCWI9s0/Nnugx5Vd9tdbZ7e6gE2tR9dzXdw==}
engines: {node: '>=6'}
dev: false
/ieee754/1.2.1:
resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
dev: false
/on-exit-leak-free/2.1.0:
resolution: {integrity: sha512-VuCaZZAjReZ3vUwgOB8LxAosIurDiAW0s13rI1YwmaP++jvcxP77AWoQvenZebpCA2m8WC1/EosPYPMjnRAp/w==}
dev: false
/pino-abstract-transport/1.0.0:
resolution: {integrity: sha512-c7vo5OpW4wIS42hUVcT5REsL8ZljsUfBjqV/e2sFxmFEFZiq1XLUp5EYLtuDH6PEHq9W1egWqRbnLUP5FuZmOA==}
dependencies:
readable-stream: 4.3.0
split2: 4.2.0
dev: false
/pino-std-serializers/6.1.0:
resolution: {integrity: sha512-KO0m2f1HkrPe9S0ldjx7za9BJjeHqBku5Ch8JyxETxT8dEFGz1PwgrHaOQupVYitpzbFSYm7nnljxD8dik2c+g==}
dev: false
/pino/8.11.0:
resolution: {integrity: sha512-Z2eKSvlrl2rH8p5eveNUnTdd4AjJk8tAsLkHYZQKGHP4WTh2Gi1cOSOs3eWPqaj+niS3gj4UkoreoaWgF3ZWYg==}
hasBin: true
dependencies:
atomic-sleep: 1.0.0
fast-redact: 3.1.2
on-exit-leak-free: 2.1.0
pino-abstract-transport: 1.0.0
pino-std-serializers: 6.1.0
process-warning: 2.1.0
quick-format-unescaped: 4.0.4
real-require: 0.2.0
safe-stable-stringify: 2.4.3
sonic-boom: 3.3.0
thread-stream: 2.3.0
dev: false
/process-warning/2.1.0:
resolution: {integrity: sha512-9C20RLxrZU/rFnxWncDkuF6O999NdIf3E1ws4B0ZeY3sRVPzWBMsYDE2lxjxhiXxg464cQTgKUGm8/i6y2YGXg==}
dev: false
/process/0.11.10:
resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==}
engines: {node: '>= 0.6.0'}
dev: false
/quick-format-unescaped/4.0.4:
resolution: {integrity: sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==}
dev: false
/readable-stream/4.3.0:
resolution: {integrity: sha512-MuEnA0lbSi7JS8XM+WNJlWZkHAAdm7gETHdFK//Q/mChGyj2akEFtdLZh32jSdkWGbRwCW9pn6g3LWDdDeZnBQ==}
engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0}
dependencies:
abort-controller: 3.0.0
buffer: 6.0.3
events: 3.3.0
process: 0.11.10
dev: false
/real-require/0.2.0:
resolution: {integrity: sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==}
engines: {node: '>= 12.13.0'}
dev: false
/safe-stable-stringify/2.4.3:
resolution: {integrity: sha512-e2bDA2WJT0wxseVd4lsDP4+3ONX6HpMXQa1ZhFQ7SU+GjvORCmShbCMltrtIDfkYhVHrOcPtj+KhmDBdPdZD1g==}
engines: {node: '>=10'}
dev: false
/sonic-boom/3.3.0:
resolution: {integrity: sha512-LYxp34KlZ1a2Jb8ZQgFCK3niIHzibdwtwNUWKg0qQRzsDoJ3Gfgkf8KdBTFU3SkejDEIlWwnSnpVdOZIhFMl/g==}
dependencies:
atomic-sleep: 1.0.0
dev: false
/split2/4.2.0:
resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==}
engines: {node: '>= 10.x'}
dev: false
/thread-stream/2.3.0:
resolution: {integrity: sha512-kaDqm1DET9pp3NXwR8382WHbnpXnRkN9xGN9dQt3B2+dmXiW8X1SOwmFOxAErEQ47ObhZ96J6yhZNXuyCOL7KA==}
dependencies:
real-require: 0.2.0
dev: false

View File

@ -0,0 +1,72 @@
apiVersion: iluzio.nicco.io/v1
kind: Scenario
metadata:
name: sat-base
spec:
events:
# Setup
- offset: 0
resource: node
action: create
id: base0
spec:
image: base-station
airGapped: false
- offset: 0
resource: node
action: create
id: base1
spec:
image: base-station
- offset: 0
resource: node
action: create
id: sat0
spec:
image: satellite
# Links
- offset: 15
resource: link
action: create
from: base0
to: sat0
direction: bi
spec:
bandwidth:
rate: 10mbps
limit: 2000000000
buffer: 200000
- offset: 45
resource: link
action: delete
from: base0
to: sat0
direction: bi
- offset: 30
resource: link
action: create
from: base1
to: sat0
direction: bi
spec:
bandwidth:
rate: 10mbps
limit: 2000000000
buffer: 200000
- offset: 60
resource: link
action: delete
from: base1
to: sat0
direction: bi
# End
- offset: 80
resource: scenario
action: end

8
code/src/k8s.py Normal file
View File

@ -0,0 +1,8 @@
from kubernetes import client, config
config.load_kube_config()
core = client.CoreV1Api()
apps = client.AppsV1Api()
crd = client.CustomObjectsApi()
networking = client.NetworkingV1Api()

99
code/src/kinds/link.py Normal file
View File

@ -0,0 +1,99 @@
import kopf
import logging
import templates
import k8s
import utils
RESOURCE = 'links.' + utils.GROUP
def get_link_metadata(body):
try:
name = body['metadata']['name']
sender = body['spec']['from']
receiver = body['spec']['to']
direction = body['spec']['direction']
if direction not in ['uni', 'bi']:
raise kopf.PermanentError(f'Invalid direction "{direction}" for link "{name}"')
unidirectional = direction == 'uni'
namespace = body['metadata']['namespace']
return namespace, name, sender, receiver, unidirectional
except KeyError as e:
raise kopf.PermanentError(f'Invalid link "{name}"') from e
def build_labels(node, value):
return {"metadata": {"labels": {
f"send-node-{node}": value,
f"receive-node-{node}": value,
}}}
@kopf.on.create(RESOURCE)
def create(body, **kwargs) -> None:
namespace, name, sender, receiver, unidirectional = get_link_metadata(body)
logging.info(f'Link "{name}" ({sender} -> {receiver}) is created on namespace "{namespace}"')
logging.debug(body)
# Chaos experiment
logging.info(
f'Creating link "{name}" ({sender} -> {receiver}) on namespace "{namespace}" with parameters "{body.spec}"')
direction = 'to' if unidirectional else 'both'
crd = templates.chaos_link(name=name, namespace=namespace, sender=sender, receiver=receiver, direction=direction)
# Check if there is a fault to apply
has_fault = False
if 'bandwidth' in body.spec:
has_fault = True
crd['spec']['action'] = 'bandwidth'
crd['spec']['bandwidth'] = body.spec['bandwidth']
else:
crd['spec']['action'] = 'netem'
for action in ['delay', 'loss', 'duplicate', 'corrupt']:
if action in body.spec:
has_fault = True
crd['spec'][action] = body.spec[action]
if has_fault:
# Only create the chaos experiment if there is a fault to apply
group, version = crd['apiVersion'].split('/')
kopf.adopt(crd)
k8s.crd.create_namespaced_custom_object(group, version, namespace, 'networkchaos', crd)
# Label the pods to enable the service and exception to the network policy
pod_sender = k8s.core.list_namespaced_pod(namespace=namespace, label_selector=f'node={sender}')
for pod in pod_sender.items:
patch = build_labels(receiver, "enabled")
if unidirectional:
del patch["metadata"]["labels"][f"receive-node-{receiver}"]
k8s.core.patch_namespaced_pod(pod.metadata.name, namespace, patch)
pod_receiver = k8s.core.list_namespaced_pod(namespace=namespace, label_selector=f'node={receiver}')
for pod in pod_receiver.items:
patch = build_labels(sender, "enabled")
if unidirectional:
del patch["metadata"]["labels"][f"send-node-{sender}"]
k8s.core.patch_namespaced_pod(pod.metadata.name, namespace, patch)
@ kopf.on.delete(RESOURCE)
def delete(body, **kwargs):
namespace, name, sender, receiver, unidirectional = get_link_metadata(body)
logging.info(f'Link "{name}" ({sender} -> {receiver}) is deleted on namespace "{namespace}"')
logging.debug(body)
# Reset labels
pod_sender = k8s.core.list_namespaced_pod(namespace=namespace, label_selector=f'node={sender}')
for pod in pod_sender.items:
patch = build_labels(receiver, None)
if unidirectional:
del patch["metadata"]["labels"][f"receive-node-{receiver}"]
k8s.core.patch_namespaced_pod(pod.metadata.name, namespace, patch)
pod_receiver = k8s.core.list_namespaced_pod(namespace=namespace, label_selector=f'node={receiver}')
for pod in pod_receiver.items:
patch = build_labels(sender, None)
if unidirectional:
del patch["metadata"]["labels"][f"send-node-{sender}"]
k8s.core.patch_namespaced_pod(pod.metadata.name, namespace, patch)

55
code/src/kinds/node.py Normal file
View File

@ -0,0 +1,55 @@
import kopf
import logging
import json
import templates
import k8s
import utils
RESOURCE = 'nodes.' + utils.GROUP
@kopf.on.create(RESOURCE)
def create(body, **kwargs):
name = body['metadata']['name']
namespace = body['metadata']['namespace']
logging.info(f'Node "{name}" is created on namespace "{namespace}"')
logging.debug(body)
try:
resources = json.dumps(body['spec']['resources'])
except KeyError:
resources = '{}'
try:
airGapped = body['spec']['airGapped'] == True
except KeyError:
airGapped = True
deployment, service, policy = templates.native_node(id=name, image=body['spec']['image'], resources=resources)
if not airGapped:
obj = {"ipBlock": {
"cidr": "0.0.0.0/0",
"except": ["10.0.0.0/8"]
}}
policy['spec']['ingress'].append({'from': [obj]})
policy['spec']['egress'].append({'to': [obj]})
logging.debug(deployment)
logging.debug(service)
logging.debug(policy)
kopf.adopt(deployment)
kopf.adopt(service)
kopf.adopt(policy)
k8s.apps.create_namespaced_deployment(namespace, deployment)
k8s.core.create_namespaced_service(namespace, service)
k8s.networking.create_namespaced_network_policy(namespace, policy)
@kopf.on.delete(RESOURCE)
def delete(body, **kwargs):
name = body['metadata']['name']
logging.info(f'Node "{name}" is deleted')
logging.debug(body)

141
code/src/kinds/scenario.py Normal file
View File

@ -0,0 +1,141 @@
import asyncio
import logging
import kopf
import k8s
import templates
import utils
RESOURCE = 'scenarios.' + utils.GROUP
@kopf.on.create(RESOURCE)
def create(body, patch, **kwargs):
name = body['metadata']['name']
namespace = body['metadata']['namespace']
logging.info(f'scenario "{name}" is created on namespace "{namespace}"')
@kopf.on.delete(RESOURCE)
def delete(body, **kwargs):
name = body['metadata']['name']
logging.info(f'scenario "{name}" is deleted')
@kopf.daemon(RESOURCE, cancellation_timeout=1)
async def daemon(meta, status, spec, **kwargs):
try:
while True:
try:
if status['ended']:
return
except KeyError:
pass
name = meta['name']
namespace = meta['namespace']
def patch(body):
k8s.crd.patch_namespaced_custom_object(
utils.GROUP, utils.VERSION, namespace, 'scenarios', name, {'status': body})
def with_prefix(id: str) -> str:
return f'{name}-{id}'
now = utils.timestamp_ms()
try:
started = status['started']
except KeyError:
patch({'started': now})
logging.info('waiting for scenario to start...')
await asyncio.sleep(0.1)
continue
# Time since the scenario started
elapsed = now - started
logging.info(f'elapsed: {elapsed}')
logging.debug(status)
# Calculate when the next run should be executed, based on future events
next_run = None
for i, event in enumerate(spec['events']):
offset = (event['offset'] or 0) * 1000
delta = offset - elapsed
if delta > 0:
if next_run is None or delta < next_run:
next_run = delta
continue
try:
s = status['events'][str(i)]
executed = s['executed']
except KeyError:
executed = False
if not executed:
# Execute event
logging.info(f'executing event {i}')
patch({'events': {i: {'executed': True, 'timestamp': now}}})
action = event['action']
if action == 'end':
# End scenario
logging.info(f'ending scenario {event}')
patch({'ended': now})
return
# NODE
elif event['resource'] == 'node':
ID = with_prefix(event['id'])
if action == 'create':
# Create node
logging.info(f'creating node {event}')
body = templates.iluzio_node(id=ID)
body['spec'] = event['spec']
kopf.adopt(body)
k8s.crd.create_namespaced_custom_object(
utils.GROUP, utils.VERSION, namespace, 'nodes', body)
elif action == 'delete':
# Delete node
logging.info(f'deleting node {event}')
k8s.crd.delete_namespaced_custom_object(
utils.GROUP, utils.VERSION, namespace, 'nodes', ID)
else:
logging.error(f'unknown action {action}')
# LINK
elif event['resource'] == 'link':
ID = '-'.join(map(lambda x: event[x], ['from', 'to', 'direction']))
ID = with_prefix(ID)
if action == 'create':
# Create links
logging.info(f'creating link {event}')
body = templates.iluzio_link(id=ID)
body['spec'].update(event['spec'])
body['spec']['from'] = with_prefix(event['from'])
body['spec']['to'] = with_prefix(event['to'])
body['spec']['direction'] = event['direction']
kopf.adopt(body)
k8s.crd.create_namespaced_custom_object(
utils.GROUP, utils.VERSION, namespace, 'links', body)
elif action == 'delete':
# Delete link
logging.info(f'deleting link {event}')
k8s.crd.delete_namespaced_custom_object(
utils.GROUP, utils.VERSION, namespace, 'links', ID)
else:
logging.error(f'unknown action {action}')
else:
logging.error(f'unknown resource {event["resource"]}')
if next_run is None:
logging.error('no next run could be calculated')
return
logging.info(f'waiting for next run in {next_run} ms')
await asyncio.sleep(next_run / 1000)
except asyncio.CancelledError:
logging.info('cancellation requested')

3
code/src/main.py Normal file
View File

@ -0,0 +1,3 @@
import kinds.node
import kinds.link
import kinds.scenario

51
code/src/templates.py Normal file
View File

@ -0,0 +1,51 @@
from os import path
from string import Template
import yaml
templates_dir = path.join(path.dirname(__file__), 'templates')
def load(name: str, variables):
"""
Load a template file and replace the placeholders with the given.
"""
file = path.join(templates_dir, name)
with open(file, 'r') as template:
contents = template.read()
replaced = Template(contents).substitute(**variables)
return list(yaml.safe_load_all(replaced))
# Iluzio
def iluzio_node(*, id: str):
"""
Load the iluzio node template.
This includes the deployment, service and network policy.
"""
return load('iluzio/node.yaml', {'id': id})[0]
def iluzio_link(*, id: str):
"""
Load the iluzio link template.
"""
return load('iluzio/link.yaml', {'id': id})[0]
# Native
def native_node(*, id: str, image: str, resources: str) -> tuple[str, str, str]:
"""
Load the node template.
This includes the deployment, service and network policy.
"""
return load('native/node.yaml', {'id': id, 'image': image, 'resources': resources})
# Chaos
def chaos_link(*, name: str, namespace: str, sender: str, receiver: str, direction: str):
"""
Load the chaos link template.
This includes the link and the service.
"""
return load('chaos/link.yaml', {'name': name, 'namespace': namespace, 'sender': sender, 'receiver': receiver, 'direction': direction})[0]

View File

@ -0,0 +1,19 @@
kind: NetworkChaos
apiVersion: chaos-mesh.org/v1alpha1
metadata:
name: ${name}
spec:
selector:
namespaces:
- ${namespace}
labelSelectors:
node: ${sender}
mode: all
target:
selector:
namespaces:
- ${namespace}
labelSelectors:
node: ${receiver}
mode: all
direction: ${direction}

View File

@ -0,0 +1,110 @@
kind: NetworkChaos
apiVersion: chaos-mesh.org/v1alpha1
metadata:
namespace: simulator
name: test
spec:
selector:
namespaces:
- simulator
labelSelectors:
node: a
mode: all
action: bandwidth
direction: to
target:
selector:
namespaces:
- simulator
labelSelectors:
node: b
mode: all
bandwidth:
rate: 10mbps
limit: 2000000000
buffer: 200000
delay:
latency: 50ms
correlation: '25'
jitter: 25ms
# loss:
# loss: '1'
# duplicate:
# duplicate: '1'
# corrupt:
# corrupt: '1'
# ---
# kind: NetworkChaos
# apiVersion: chaos-mesh.org/v1alpha1
# metadata:
# namespace: simulator
# name: link-a-b
# spec:
# selector:
# namespaces:
# - simulator
# labelSelectors:
# node: a
# mode: all
# action: partition
# direction: to
# target:
# selector:
# namespaces:
# - simulator
# labelSelectors:
# node: b
# mode: all
# ---
# kind: NetworkChaos
# apiVersion: chaos-mesh.org/v1alpha1
# metadata:
# namespace: simulator
# name: test
# spec:
# selector:
# namespaces:
# - simulator
# labelSelectors:
# node: a
# mode: all
# action: delay
# delay:
# latency: 10ms
# correlation: '0'
# jitter: 1ms
# direction: both
# target:
# selector:
# namespaces:
# - simulator
# labelSelectors:
# node: b
# mode: all
# ---
# kind: NetworkChaos
# apiVersion: chaos-mesh.org/v1alpha1
# metadata:
# namespace: simulator
# name: band-b
# spec:
# selector:
# namespaces:
# - simulator
# labelSelectors:
# node: a
# mode: all
# action: bandwidth
# bandwidth:
# rate: 10mbps
# limit: 2000000000
# buffer: 1500
# direction: both
# target:
# selector:
# namespaces:
# - simulator
# labelSelectors:
# node: b
# mode: all

View File

@ -0,0 +1,25 @@
apiVersion: iluzio.nicco.io/v1
kind: Link
metadata:
name: test-link
spec:
from: a
to: b
# direction: uni
direction: bi
# bandwidth:
# rate: 1kbps
# # limit: 20971520
# limit: 10000
# buffer: 5000
# delay:
# latency: 100ms
# correlation: '0'
# jitter: 50ms
# loss:
# loss: '0.5'
# correlation: '100'
# duplicate:
# duplicate: '1'
# corrupt:
# corrupt: '1'

View File

@ -0,0 +1,11 @@
apiVersion: iluzio.nicco.io/v1
kind: Node
metadata:
name: b
spec:
image: idle
resources:
limits:
memory: '128Mi'
cpu: '500m'
ephemeral-storage: '4Gi'

View File

@ -0,0 +1,25 @@
kind: NetworkChaos
apiVersion: chaos-mesh.org/v1alpha1
metadata:
namespace: simulator
name: band-b
spec:
selector:
namespaces:
- simulator
labelSelectors:
node: a
mode: all
action: bandwidth
bandwidth:
rate: 10gbps
limit: 2000000000
buffer: 1500
direction: both
target:
selector:
namespaces:
- simulator
labelSelectors:
node: b
mode: all

View File

@ -0,0 +1,47 @@
apiVersion: chaos-mesh.org/v1alpha1
kind: Workflow
metadata:
name: test
spec:
entry: entry
templates:
- name: entry
templateType: Serial
children:
- delay
- loss
- name: delay
templateType: NetworkChaos
networkChaos:
action: delay
selector:
namespaces:
- simulator
mode: all
target:
selector:
namespaces:
- simulator
mode: all
direction: both
delay:
latency: '90ms'
correlation: '25'
jitter: '1ms'
- name: loss
templateType: NetworkChaos
networkChaos:
action: delay
selector:
namespaces:
- simulator
mode: all
target:
selector:
namespaces:
- simulator
mode: all
direction: both
loss:
loss: '25'
correlation: '25'

View File

@ -0,0 +1,5 @@
apiVersion: iluzio.nicco.io/v1
kind: Link
metadata:
name: ${id}
spec: {}

View File

@ -0,0 +1,5 @@
apiVersion: iluzio.nicco.io/v1
kind: Node
metadata:
name: ${id}
spec: {}

View File

@ -0,0 +1,81 @@
# Simulation node with sidecar
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${id}
spec:
replicas: 1
selector:
matchLabels:
node: ${id}
template:
metadata:
name: pod-${id}
labels:
node: ${id}
spec:
dnsPolicy: ClusterFirst
containers:
# Image
- name: app
image: ${image}
imagePullPolicy: Never
resources: ${resources}
# Sidecar
- name: sidecar
image: sidecar
imagePullPolicy: Never
env:
- name: SERVICE
value: ${id}
resources: {}
---
# Service that makes other nodes discoverable to this node
apiVersion: v1
kind: Service
metadata:
name: ${id}
spec:
clusterIP: None
selector:
receive-node-${id}: enabled
---
# This network policy is to deny all traffic from and to another namespace
# https://github.com/ahmetb/kubernetes-network-policy-recipes/blob/master/04-deny-traffic-from-other-namespaces.md
# Exceptions is traffic to and from kube-system namespace. This is needed for the DNS resolution of services.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: ${id}
spec:
podSelector:
matchLabels:
node: ${id}
ingress:
# Internal DNS
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
# All the pods in the same namespace
- from:
- podSelector:
matchLabels:
send-node-${id}: enabled
egress:
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
- to:
- podSelector:
matchLabels:
receive-node-${id}: enabled

8
code/src/utils.py Normal file
View File

@ -0,0 +1,8 @@
import time
GROUP = 'iluzio.nicco.io'
VERSION = 'v1'
def timestamp_ms() -> int:
return int(time.time() * 1000)

2
defence/.gitattributes vendored Normal file
View File

@ -0,0 +1,2 @@
*.key filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text

3
defence/Defence.pdf Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b59b4395bf228a16b075e6bdf3027224a18f7d893ed2bf274e23efc4874d441c
size 2372569

4
thesis/.gitattributes vendored Normal file
View File

@ -0,0 +1,4 @@
*.zip filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.afdesign filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text

17
thesis/.gitignore vendored Normal file
View File

@ -0,0 +1,17 @@
# Own
*.pdf
!images/*.pdf
# Common
.vscode
*.log
# Tex
*.aux
*.pyg
*.toc
*.lof
*.lot
*.xml
*.bbl
*-blx.bib

9
thesis/Makefile Normal file
View File

@ -0,0 +1,9 @@
build:: fonts graphs
tectonic -Z shell-escape src/main.tex
mv src/main.pdf Thesis.pdf
fonts::
./fonts/install.sh
graphs::
find . -name "*.mmd" | xargs -I % npx -yp @mermaid-js/mermaid-cli mmdc -i % -f -e pdf

17
thesis/README.md Normal file
View File

@ -0,0 +1,17 @@
# Master Thesis
## Building
```sh
make -B build
```
If you don't have the right fonts installed run
```sh
make -B fonts
```
## Packages
- [TUD Script](https://mirror.foobar.to/CTAN/macros/latex/contrib/tudscr/doc/tudscr.pdf)

View File

@ -0,0 +1,24 @@
stateDiagram-v2
direction LR
state Creation {
state "NetworkChaos" as nc0
state "Labels" as l0
[*] --> nc0: Create and adopt
nc0 --> l0: Patch
l0 --> [*]
}
[*] --> Creation
Creation --> Running
state Deletion {
state "NetworkChaos" as nc1
state "Labels" as l1
[*] --> l1: Cascade deletion
l1 --> nc1: Patch
nc1 --> [*]
}
Running --> Deletion
Deletion --> [*]

24
thesis/diagrams/node.mmd Normal file
View File

@ -0,0 +1,24 @@
stateDiagram-v2
direction LR
[*] --> Creation
state Creation {
state f0 <<fork>>
state j0 <<join>>
[*] --> f0: Preparation
f0 --> j0: Deployment
f0 --> j0: Service
f0 --> j0: NetworkPolicy
j0 --> [*]: Ready
}
Creation --> Running
Running --> Deletion
state Deletion {
state f1 <<fork>>
state j1 <<join>>
[*] --> f1: Cascading deletion
f1 --> j1: Deployment
f1 --> j1: Service
f1 --> j1: NetworkPolicy
j1 --> [*]
}
Deletion --> [*]

View File

@ -0,0 +1,9 @@
stateDiagram-v2
state "Daemon Running" as d {
[*] --> Loop: Start daemon
Loop --> Loop: Node event
Loop --> Loop: Link event
Loop --> [*]: End event and exit daemon
}
[*] --> d: CRD Created
d --> [*]: Ended

View File

@ -0,0 +1,32 @@
stateDiagram-v2
state "Daemon Loop" as loop {
state a <<choice>>
[*] --> action
action --> a
state "Execute event" as exec
note right of exec
status.events[i].executed = timestamp
end note
a --> exec: node
a --> exec: link
a --> [*]: end
}
state "Created" as c
note left of c
status.created = timestamp
end note
state "Finished" as done
note left of done
status.ended = timestamp
end note
[*] --> c
c --> loop
loop --> done
done --> [*]

BIN
thesis/fonts/Open_Sans.zip (Stored with Git LFS) Normal file

Binary file not shown.

BIN
thesis/fonts/Roboto_Mono.zip (Stored with Git LFS) Normal file

Binary file not shown.

6
thesis/fonts/install.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/sh
TARGET=~/.fonts
mkdir -p $TARGET
unzip -o -d "$TARGET/roboto_mono" "./fonts/Roboto_Mono.zip"
unzip -o -d "$TARGET/open_sans" "./fonts/Open_Sans.zip"

BIN
thesis/images/Network Labels.pdf (Stored with Git LFS) Executable file

Binary file not shown.

BIN
thesis/images/Network.pdf (Stored with Git LFS) Executable file

Binary file not shown.

BIN
thesis/images/Node.pdf (Stored with Git LFS) Executable file

Binary file not shown.

BIN
thesis/images/Overview.pdf (Stored with Git LFS) Executable file

Binary file not shown.

BIN
thesis/images/Single Node.pdf (Stored with Git LFS) Executable file

Binary file not shown.

BIN
thesis/images/dtn-vs-tcp.png (Stored with Git LFS) Normal file

Binary file not shown.

View File

@ -0,0 +1,238 @@
\chapter{Concept}
This chapter will focus on the general concept and structure of the simulator and its subcomponents.
\section{General Overview}
At the most basic level, the simulator is split in two parts: controller and nodes.
The controller \ref{sim-controller} is the brain of the simulator and takes charge of all the logic, the simulation, node orchestration, logging and everything that the simulation needs to run.
Scenarios, Nodes \ref{sim-node} and Links \ref{sim-link} on the other hand, "exist" for the time they are running or needed. The creation, administration, and deletion of those components is taken care of by the controller.
The goal of Nodes is to be framework and language agnostic in order to retain as much flexibility as possible and not limit the user in what interfaces the simulator expects.
\begin{figure}[h]
\label{fig:concept-overview}
\caption{Architecture overview}
\centering
\includegraphics[width=0.75\textwidth]{Overview.pdf}
\end{figure}
\section{Components of the simulator}
\subsection{Controller \label{sim-controller}}
The controller is the brain of the simulator and keeps track of what actions need to be undertaken.
The basic principles of the simulator are events \ref{sim-event}.
These are the actions that modify a given scenario (e.g. creating a new node, editing link properties, etc.).
The controller also accepts a configuration file which has predefined events defined in it.
A configuration file is enough to run any kind of simulation and should be the preferred way to create a simulation. The API is exposed for the purpose of making the tool more extendible. Every event that is available in the configuration file should also be available through the API, making them feature complete and identical.
The simulation controller is modelled as a Kubernetes Operator\footnote{\url{https://kubernetes.io/docs/concepts/extend-kubernetes/operator/}}. Operators extend Kubernetes controllers such that every operator is a controller, but not the other way around. Operators are meant to complete domain specific operations that could not be achieved with general purpose tools inside of Kubernetes. Often this takes the form of automated operation of components, as this is the case for this work.
The operator should control each component of the simulation, such as nodes, links, and events and their respective lifecycles. By doing so, it will manage the execution of a given simulation and gather information about said simulation.
\subsection{Scenario \label{sim-scenario}}
A Scenario is the collection of all information the simulator needs to run a given simulation. It describes the sequence of everything that needs to happen inside a given simulation, or Scenario.
In this case, a Scenario is a collection of simulation nodes and events. They describe a simulation run through a series of events with which they can create, modify or delete nodes and links.
The Scenario is the only information needed for the controller to run a simulation. This is what a user would create and pass to the controller. The rest should be taken care by the controller and orchestrate all single items inside the scenario.
\subsection{Node \label{sim-node}}
A node (of the simulation, not Kubernetes) is defined as the smallest unit of communication. It can receive or send data from other nodes. A node can be dynamically created or removed from a given scenario at any moment by events. A node can have any number of links $l \in \mathbf{N}$ to other nodes inside a scenario.
\begin{table}[H]
\centering
\begin{tabular}{ l|l }
\label{table:properties-node}
Property & Overview \\
\hline
ID & Uniquely identifying string for the node \\
Image & A container image that will be used for the given node \\
Memory & The amount of computer memory available to the node \\
Storage & The amount of storage available to the node \\
CPU & The amount of CPU resources available to the node \\
\end{tabular}
\caption{Node Properties}
\end{table}
% TODO: Review
The node ID has to be unique, as it is used to reference it inside the scenario for adding links for example. As, they are also internally used to structure the various Kubernetes resources required by a node. IDs are required to be a valid DNS subdomain names, as specified by Kubernetes\footnote{\url{https://kubernetes.io/docs/concepts/overview/working-with-objects/names/\#dns-subdomain-names}} and the referenced RFC \cite{RFC1123}.
The image is a user provided image, speficically a Docker image, but more generally an \ac{oci} image\footnote{\url{https://github.com/opencontainers/image-spec/blob/main/spec.md}}. This image provides the user code to the simulator. Each node can run a separate image and is provided with a runtime dictated by the specified properties.
The memory, storage, and CPU fields are properties to limit the compute power of a given system. They map natively to Kubernetes resource management\footnote{\url{https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/\#resource-units-in-kubernetes}}. For the CPU, a unit called \textit{millicpu}s is used, which represents a per mill unit on a thread. Memory is simply specified by a number followed by a unit, as is the storage.
\begin{figure}[H]
\label{fig:nodes-overview}
\caption{Node overview}
\centering
\includegraphics[width=0.9\textwidth]{Node.pdf}
\end{figure}
A simulation node it not to be confused with a Kubernetes Node\footnote{\url{https://kubernetes.io/docs/concepts/architecture/nodes/}}. Each simulation node maps to a Kubernetes Pod, the smallest available unit in Kubernetes. Pods can contain multiple containers inside of it, which share the same resources and lifecycle. Shared resources include storage and network.
In this work, the Pod will include two containers: The image provided by the user for the simulation and a second container provided automatically as a Sidecar pattern\cite{azure-sidecar} by the controller.
More details on the Sidecar container are explained later on\ref{sim-link}.
\subsubsection{Node Discovery}
Each node that wants to communicate with another party needs some kind of discovery mechanism. As communication partners are dynamic, meaning they can appear and cease to exist at any moment, a static configuration of our simulation node is not a viable solution.
While static discovery could be defined at a configuration level, dynamic configuration requires more sophisticated methods that require runtime interaction, either in the simulator or in the container. As every node will need to implement discovery, the aim is to provide this functionality "out of the box" in order to keep simulation images uncluttered and agnostic to the discovery method used.
The following solutions were considered to achieve dynamic node discovery at runtime:
\begin{enumerate}
\item Kubernetes Service\footnote{\url{https://kubernetes.io/docs/concepts/services-networking/service/}} \& environment variables\footnote{\url{https://en.wikipedia.org/wiki/Environment_variable}}
\item Sidecar container
\end{enumerate}
The first option would leverage native Kubernetes Services and a configured env variable for the container. The environment variable would contain the name of the Service.
Kubernetes natively supports similar functionality at it's core, called Services. Generally they are used to load-balance, often in a round-robin fashion, making multiple Pods available to other nodes. In practice, this is implemented using \ac{dns} that returns a single, changing \ac{ip} address belonging to one of the Pods inside that service. While in most scenarios this is quite desirable, in this case the behaviour needs to be modified, as all the communication nodes should be reachable simultaneously instead of routing to a single pod addressed by such service.
If unchanged, this would mean that the image provided to the simulator would resolve DNS on its own, given the service address via an environment variable. While virtually all operating systems and therefore container images are capable of doing so, it is not certain how long the resolved values are cached. This could lead to undesired behaviour and is therefore not the preferred solution. Furthermore, the default behaviour for resolving \ac{dns} returns one \ac{ip} address, which makes addressing all the available nodes not feasible.
The second option leverages much the same mechanisms, however the querying of \ac{dns} entries is handled by a Sidecar container \cite{azure-sidecar}.
By using a separate application for this task, we can make sure that no caches are being used and provide all the \ac{ip} addresses to the container, instead of a single one that keeps changing.
For each user container running in a Pod, a second container will be automatically added as a sidecar \cite{azure-sidecar}. The sidecar container then exposes a simple HTTP endpoint for retrieving the current IP addresses at any moment.
Whenever a request is made, the sidecar application will create a DNS lookup to the Kubernetes service and return the list to the container.
From an architecture perspective, this means that every simulation node is mapped to a Kubernetes Pod with its discovery sidecar, and a Kubernetes Service\footnote{\url{https://kubernetes.io/docs/concepts/services-networking/service/}} where all the nodes that should be visible and therefore connected to the given node.
This goes against the intuitive way of using Kubernetes Services, where usually the companioning Service is used to make the Pod or Deployment available to other systems. Here the usage is reversed. In addition, each node will also be accompanied by a Kubernetes Network Policy to create network isolation between the nodes. This will be explored in more detail in the next section.
\subsection{Link \label{sim-link}}
A link is defined as the ability to communicate between two nodes.
Links, as nodes, can be established or destroyed at any moment, enabling or blocking communications between nodes.
By default, all communication is blocked, meaning that no communication is possible between nodes and to any network outside the simulation.
Links also have directionality, meaning that for a given link we can specify different properties for incoming and outgoing traffic.
This is useful for simulating asymmetric links, where the quality of the link is different in one direction to the other. In addition, it allows simulating unidirectional links, where a link can only communicate in one direction, with no reply possible.
\begin{table}[H]
\centering
\begin{tabular}{ l|l }
\label{table:properties-link}
Property & Description \\
\hline
From & Source node. Specified as ID \\
To & Destination node. Specified as ID \\
Direction & Direction of the link. Uni-, or bidirectional \\
Parameters & Parameters for link quality. \ref{link-quality} \\
\end{tabular}
\caption{Link Properties}
\end{table}
Manual, user given, IDs for links are not necessary as we only create a single, unique link between two nodes. As source and destination IDs are both unique, its combination is unique as well. Internally the simulator will use this combination as the ID for the link and construct a unique name for the Kubernetes object composed by from, to, and direction of a given link.
\subsubsection{Communication}
The foundation of communication between simulation nodes is carried out over \ac{ip}. The choice of what protocols are used on top of \ac{ip} is open to the user of the simulator. \ac{tcp} and \ac{udp} could be such protocols, for example, but it can be new protocols, or no protocol at all. The simulator is agnostic, and it's in the domain of the user provided container image to implement communication on top of \ac{ip}.
\subsubsection{Convergence Layer}
The Convergence layer, as described in \cite{RFC9171}, is a fundamental part of the Bundle Protocol, with \ac{ltp} being often considered as the most common protocol to implement such a convergence layer. Other alternatives exist, as mTCP \cite{ietf-dtn-mtcpcl-01} which uses TCP as the underlying protocol.
As the simulation nodes have access to the IP stack, they can provide their own convergence layer as needed.
The implementation of such a layer is not part of the simulator, but it's up to the container image to implement it. The simulator will provide the necessary tools to create and manage the links between nodes, but it's up to the container image to implement the convergence layer upon the nodes communicate. However, this is an optional step, as the node can also simply communicate over \ac{ip} and any protocol built on top of it, such as (but not limited to) \ac{tcp} and \ac{udp}.
\subsubsection{Quality of a link} \label{link-quality}
A key part in the simulation is the ability to disrupt and degrade the quality of service on a given link. This is essential as it creates the actual difficulty for nodes to talk and communicate between each other.
The quality of links is the parameters with which the link between two simulation nodes can be modelled and manipulated. They are divided into two categories: \textit{Base properties} and \textit{Faults}. \ref{table:link-parameters}
\begin{table}[H]
\centering
\begin{tabular}{|l|l|l|}
\hline
Parameter & Type & Description \\
\hline\hline
Bandwidth & Base & Bandwidth of the link \\
Latency & Base & The one-way latency of the link \\
Jitter & Fault & Variance in latency \\
Packet Loss & Fault & Probability of packet loss \\
Duplication & Fault & Probability of a packet to be duplicated \\
Reordering & Fault & Probability of packet to be reordered \\
Corruption & Fault & Probability of packet to be corrupted \\
\hline
\end{tabular}
\caption{Categories of link parameters}
\label{table:link-parameters}
\end{table}
Each parameter can be set on initialization and changed over time during the course of the simulation using events \ref{sim-event}. Details about single parameters will be explained in the next chapter \ref{chapter:implementation}.
\subsection{Event \label{sim-event}}
Events are the driving component of a scenario, as they dictate the actual change in the simulation. They are created statically and are therefore predefined at the start of a simulation as a part of the scenario.
An event is defined as a certain action that the operator \ref{sim-controller} has to take. Actions are responsible for altering the current state and therefore modifying links \ref{sim-link} and nodes \ref{sim-node}, or concluding a simulation run.
\begin{table}[H]
\centering
\begin{tabular}{ l|l }
\label{table:properties-event}
Property & Description \\
\hline
Offset & Offset in \si{\milli\second} relative to the start of the simulation \\
Resource & $node$, $link$ or $scenario$ \\
Action & Type of action taken on the resource ${create, delete, set, end}$ \\
Data & Data to affect the resource, will be dependent on the specific resource and action taken \\
\end{tabular}
\caption{Event Properties}
\end{table}
For each resource type, there are different actions that can be taken. Next, the different actions will be laid out for each resource. The exact data definitions will be specified in the chapter about the implementation \ref{chapter:implementation}.
\subsubsection{Scenario}
\begin{table}[H]
\centering
\begin{tabular}{|l|c|l|l|}
\hline
Resource & Action & Description & Data \\
\hline\hline
scenario & end & End the simulation & - \\
\hline
\end{tabular}
\caption{Categories of link parameters}
\label{table:events-scenario}
\end{table}
\subsubsection{Node}
\begin{table}[H]
\centering
\begin{tabular}{|l|c|l|l|}
\hline
Resource & Action & Description & Data \\
\hline\hline
node & create & Create a new node & id, image and general pod resources \\
node & delete & Delete a node & id \\
\hline
\end{tabular}
\caption{Categories of link parameters}
\label{table:events-node}
\end{table}
\subsubsection{Link}
\begin{table}[H]
\centering
\begin{tabular}{|l|c|l|l|}
\hline
Resource & Action & Description & Data \\
\hline\hline
link & create & Create a new link & from, to, direction and parameters \ref{table:link-parameters} \\
link & delete & Delete a link & from, to, direction \\
% link & set & Set link parameters & from, to and parameters \ref{table:link-parameters} \\
\hline
\end{tabular}
\caption{Categories of link parameters}
\label{table:events-link}
\end{table}

View File

@ -0,0 +1,55 @@
\chapter{Documentation}
\label{chapter:documentation}
\section{Requirements}
\label{section:requirements}
The Simulator requires a few dependencies to run.
Firstly, it requires a Kubernetes cluster which the user has access to. This cluster has to support NetworkPolicies, which generally requires a \ac{cni} plugin that supports NetworkPolicies. The simulator has been tested with the following \ac{cni}: \verb|calico|.
Secondly, the cluster needs to have Chaos Mesh available and installed. The simulator has been tested with Chaos Mesh version \verb|2.5.1|. For more information on how to install Chaos Mesh, please refer to the official documentation\footnote{\url{https://chaos-mesh.org/docs/quick-start/}}.
\section{Quick Start}
For running the simulator locally, minikube\footnote{\url{https://minikube.sigs.k8s.io/docs/}} is required. Minikube is a tool that makes it easy to run a local Kubernetes cluster. It is available for Linux, macOS, and Windows. The user is required to have a Container or VM manager available, such as Docker or VirtualBox. For more details on how to install minikube, please refer to the official documentation\footnote{\url{https://minikube.sigs.k8s.io/docs/start/}}.
In addition to minikube, another tool called Poetry\footnote{\url{https://python-poetry.org/}} is required. Poetry is a tool for dependency management for python, and will take care of installing all required dependencies for the simulator. For more details on how to install Poetry, please refer to the official documentation\footnote{\url{https://python-poetry.org/docs/\#installation}}.
Once minikube and poetry are installed, the user can start a local cluster by running the following command:
\begin{minted}{bash}
make start
\end{minted}
This will execute a few bootstrap scripts that will install the required dependencies and start the local cluster. This process might take multiple minutes.
\begin{enumerate}
\item Create a minikube cluster with Calico as \ac{cni}.
\item Create a namespace \verb|simulator| to scope all resources.
\item Install Chaos Mesh.
\item Install the CRDs of the simulator.
\item Build the docker images of the simulator inside the cluster.
\item Open the dashboard of the cluster and Chaos Mesh.
\end{enumerate}
Once the cluster has been created and the dashboards are running, the operator can be started by running the following command:
\begin{minted}{bash}
make operator-up
\end{minted}
This will install all the poetry dependencies and start the operator. The operator will start listening for new scenarios and will start executing them.
\section{Run an example scenario}
To run an example scenario, we can use one provided in the \verb|examples| folder. First, the cluster is created, and the operator started. After that, the \verb|run.sh| script can be executed, which will build the docker images of the scenario and afterward apply the scenario resource to the cluster.
As mentioned in the previous chapter, it is recommended to change the DNS TTL time to a lower value.
\begin{minted}{bash}
make start
make operator-up
cd scenarios/one-sat-two-base
./run.sh
\end{minted}

View File

@ -0,0 +1,601 @@
\chapter{Evaluation}
This chapter will focus on evaluating the results of this work.
The first section will look at the basic building blocks of the simulator and how they behave.
\section{Basic building blocks}
The following basic functionality will be tested singularly:
\begin{enumerate}
\item Bandwidth
\item Delay
\item Loss
\item Jitter
\item Unidirectionality
\end{enumerate}
For each of these, a test will be performed to see how the simulator behaves. The tests will be performed on a single Kubernetes node, and between two simulation nodes (name=\verb|a|,\verb|b|). This minimized the number of variables that could affect the results.
The tests were run on an Apple M1 Pro with 32GB of RAM on a local installation of Kubernetes in Minikube.
Additionally, the nodes request a full CPU thread to ensure enough computing power is available. The image used for the nodes is \verb|idle|, which is a minimal image with a few network tools installed, but no other functionality.
The configuration of the simulation nodes is as follows:
\begin{minted}{yaml}
apiVersion: iluzio.nicco.io/v1
kind: Node
metadata:
name: a
spec:
image: idle
resources:
requests:
memory: '256Mi'
cpu: '1000m'
\end{minted}
To avoid confusion in the write-up, the following abbreviations will be used:
\begin{itemize}
\item \verb|ip-a|: The IP address of the node with the name \verb|a|.
\item \verb|ip-b|: The IP address of the node with the name \verb|b|.
\end{itemize}
\subsection{Bandwidth}
\subsubsection{Methodology}
For this test, one node will be sending a stream of data to the other node. For measuring the bandwidth, \verb|iperf3|\footnote{\url{https://iperf.fr/}} was chosen, as it is a well-known tool for that purpose. The bandwidth will be measured in both directions, as the bandwidth is not necessarily the same in both directions.
On the receiving side, the following command will be used.
\begin{minted}{bash}
iperf3 -s
\end{minted}
On the sending side, the following command will be used.
The \verb|-Z| flag will enable the zero-copy mode, which will reduce the overhead of the network stack.
The \verb|--bidir| flag will make the test run in both directions.
The \verb|-t| flag will set the duration of the test to 30 seconds.
\begin{minted}{bash}
iperf3 -c ip-b -Z --bidir -t 30
\end{minted}
\subsubsection{Results}
% Table with rate, limit, buffer, a->b, b->a
\begin{table}[h]
\centering
\begin{tabular}{|l|r|r|c|c|c|}
\cline{4-6}
\multicolumn{3}{c|}{} & \multicolumn{2}{c|}{Bidirectional} & Unidirectional \\
\hline
Rate & Limit & Buffer & a→b & b→a & a→b \\
\hline\hline
1kbps & 20000 & 5000 & \textit{error} & \textit{error} & \SI{13,7}{\kibi\byte\per\second} \\ \hline
50kbps & 20000 & 5000 & \SI{63,4}{\kibi\byte\per\second} & \SI{62,25}{\kibi\byte\per\second} & \SI{55,4}{\kibi\byte\per\second} \\ \hline
500kbps & 20000 & 5000 & \SI{495,5}{\kibi\byte\per\second} & \SI{497}{\kibi\byte\per\second} & \SI{506,5}{\kibi\byte\per\second} \\ \hline
1mbps & 2000000000 & 100000 & \SI{1,016}{\mebi\byte\per\second} & \SI{1,001}{\mebi\byte\per\second} & \SI{1,13}{\mebi\byte\per\second} \\ \hline
50mbps & 2000000000 & 100000 & \SI{50,03}{\mebi\byte\per\second} & \SI{50,02}{\mebi\byte\per\second} & \SI{50,45}{\mebi\byte\per\second} \\ \hline
500mbps & 2000000000 & 100000 & \SI{498,0}{\mebi\byte\per\second} & \SI{497,5}{\mebi\byte\per\second} & \SI{498,5}{\mebi\byte\per\second} \\ \hline
1gbps & 2000000000 & 100000 & \SI{1,01}{\gibi\byte\per\second} & \SI{1,01}{\gibi\byte\per\second} & \SI{1,01}{\gibi\byte\per\second} \\ \hline
5gbps & 2000000000 & 100000 & \SI{4,32}{\gibi\byte\per\second} & \SI{4,34}{\gibi\byte\per\second} & \SI{4,68}{\gibi\byte\per\second} \\ \hline
10gbps & 2000000000 & 100000 & \SI{2,145}{\gibi\byte\per\second} & \SI{4,38}{\gibi\byte\per\second} & \SI{8,445}{\gibi\byte\per\second} \\ \hline
10gbps & 2000000000 & 100000000 & \SI{1,21}{\gibi\byte\per\second} & \SI{9,905}{\gibi\byte\per\second} & \SI{10,3}{\gibi\byte\per\second} \\ \hline
No limit & - & - & \SI{1,19}{\gibi\byte\per\second} & \SI{9,98}{\gibi\byte\per\second} & \SI{26,1}{\gibi\byte\per\second} \\ \hline
\end{tabular}
\caption{Bandwidth test results}
\label{table:evaluation-bandwidth}
\end{table}
Low values (50kbps and below) seem not to cause trouble, especially the test run with 1kbps. This could be either due to `iperf3` not handling low bit rates properly or the NetworkChaos implementation.
High values are limited by the CPU of the machine, as the CPU is unable to keep up with the high bandwidth and the connection starts to favor one direction over the other in the higher bandwidths.
For the other tests, the results seem very accurate and usable.
\subsection{Delay}
\subsubsection{Methodology}
The delay, or latency, of the link will be tested using the ping\footnote{\url{https://man.archlinux.org/man/ping.8.en}} utility.
Each run will use the \verb|-i| flag to set the interval between each packet to 1 second.
The \verb|-c| flag will set the number of packets to 10.
The \verb|-W| flag will set the timeout to 1 minute for the longer tests.
The command used to test the delay is as follows.
\begin{minted}{bash}
ping -i 1 -c 10 ip-b
\end{minted}
The link used for this test is as follows.
\begin{minted}{bash}
apiVersion: iluzio.nicco.io/v1
kind: Link
metadata:
name: test-link
spec:
from: a
to: b
direction: bi
delay:
latency: 1ms
correlation: '0'
jitter: 0ms
\end{minted}
\subsubsection{Results}
% Table with latency and result
\begin{table}[h]
\centering
\begin{tabular}{|l|r|r|c|c|c|}
\hline
Latency & Results for rrt (min/avg/max/mdev) in \si{\milli\second} \\
\hline\hline
No limit & 0.035/0.052/0.088/0.017 \\ \hline
\SI{0,1}{\milli\second} & 0.305/0.384/0.609/0.109 \\ \hline
\SI{0,5}{\milli\second} & 1.111/1.173/1.283/0.062 \\ \hline
\SI{1}{\milli\second} & 2.095/2.152/2.263/0.058 \\ \hline
\SI{2}{\milli\second} & 4.083/4.166/4.538/0.126 \\ \hline
\SI{5}{\milli\second} & 10.102/10.811/11.572/0.657 \\ \hline
\SI{50}{\milli\second} & 100.086/100.379/100.956/0.360 \\ \hline
\SI{100}{\milli\second} & 200.143/200.970/201.766/0.676 \\ \hline
\SI{500}{\milli\second} & 1000.145/1001.746/1004.690/1.589 \\ \hline
\SI{1}{\second} & 2000.088/2000.782/2002.136/0.769 \\ \hline
\SI{30}{\second} & 60000.596/60002.181/60004.193/1.499 \\ \hline
\SI{1}{\minute} & 120000.491/120001.342/120002.965/1.148 \\ \hline
\end{tabular}
\caption{Delay test results}
\label{table:evaluation-delay}
\end{table}
\subsection{Packet loss}
\subsubsection{Methodology}
This test will be conducted using the ping utility, as it can measure packet loss.
The \verb|-i| flag will be used to set the interval between each packet to 0.02 second.
The \verb|-c| flag will be used to set the number of packets to 200.
The link will be unidirectional, as otherwise it would measure the probability of a packet being lost in both directions.
The command used to test the packet loss is as follows.
\begin{minted}{bash}
ping ip-b -i 0.02 -c 200
\end{minted}
The link used for this test is as follows.
\begin{minted}{bash}
apiVersion: iluzio.nicco.io/v1
kind: Link
metadata:
name: test-link
spec:
from: a
to: b
direction: uni
loss:
loss: '1'
\end{minted}
\subsubsection{Results}
\begin{table}[h]
\centering
\begin{tabular}{|l|r|}
\hline
Loss Rate & Measured \\
\hline\hline
No loss rate & 0\% \\ \hline
1 & 1.5\% \\ \hline
2 & 3\% \\ \hline
5 & 6\% \\ \hline
10 & 12.5\% \\ \hline
25 & 25\% \\ \hline
50 & 43.5\% \\ \hline
75 & 74.5\% \\ \hline
100 & 100\% \\ \hline
\end{tabular}
\caption{Packet loss test results}
\label{table:evaluation-packet-loss}
\end{table}
\subsection{Jitter}
\subsubsection{Methodology}
This test will be conducted using the iperf3 utility.
When using the \verb|-u| flag, iperf3 will use \ac{udp} and measure the jitter.
This test will be conducted in both directions.
The command used to test the jitter is as follows.
\begin{minted}{bash}
# Receiving node
iperf3 -s
# Sending node
iperf3 -c ip-b -u -t 30
\end{minted}
The link used for this test is as follows.
\begin{minted}{bash}
apiVersion: iluzio.nicco.io/v1
kind: Link
metadata:
name: test-link
spec:
from: a
to: b
direction: bi
delay:
latency: 10ms
jitter: 5ms
\end{minted}
\subsubsection{Results}
\begin{table}[h]
\centering
\begin{tabular}{|l|l|r|}
\hline
Latency & Jitter & Measured \\
\hline\hline
- & - & \SI{0.035}{\milli\second} \\ \hline
\SI{10}{\milli\second} & \SI{1}{\milli\second} & \SI{0.757}{\milli\second} \\ \hline
\SI{10}{\milli\second} & \SI{2}{\milli\second} & \SI{1.342}{\milli\second} \\ \hline
\SI{10}{\milli\second} & \SI{2}{\milli\second} & \SI{1.342}{\milli\second} \\ \hline
\SI{10}{\milli\second} & \SI{5}{\milli\second} & \SI{3.093}{\milli\second} \\ \hline
\SI{50}{\milli\second} & \SI{10}{\milli\second} & \SI{6.713}{\milli\second} \\ \hline
\SI{50}{\milli\second} & \SI{25}{\milli\second} & \SI{17.589}{\milli\second} \\ \hline
\SI{100}{\milli\second} & \SI{50}{\milli\second} & \SI{35.315}{\milli\second} \\ \hline
\end{tabular}
\caption{Jitter test results}
\label{table:evaluation-jitter}
\end{table}
Unfortunately, these results are not very accurate.
This means that when running simulations, jitter should be considered more of a guideline than an exact value.
\subsection{Unidirectionality}
\subsubsection{Methodology}
This test will be performed by connecting the two nodes to each other with a unidirectional link. Then, using the netcat utility, a stream of data will be sent from one node to the other. The stream will be using \ac{udp}, as \ac{tcp} requires a connection to be established, which is not possible with a unidirectional link.
The commands used to test the unidirectionality are as follows.
\begin{minted}{bash}
# Receiving node
nc -ul 1234
# Sending node
nc -u ip-b 1234
\end{minted}
The link used for this test is as follows.
\begin{minted}{bash}
apiVersion: iluzio.nicco.io/v1
kind: Link
metadata:
name: test-link
spec:
from: a
to: b
direction: uni
\end{minted}
\subsection{Results}
Before creating the link, no packets can be sent or received by both nodes.
After creating the link, \verb|a| can send packets to \verb|b|, but not the other way around.
\section{Goals}
Requirements were introduced at the beginning of this work \ref{section:requirements}. Through running different scenarios, it could be seen which requirements were met, which not, and which were partially achieved.
The scenarios ran were inspired by the use cases postulated in the previous section \ref{section:scenarios}. The actual scenarios were slightly different, to make testing quicker. Especially the time frames were shortened, without affecting functionality.
\section{Procedure}
\subsection{Scenario A}
This scenario borrows from the first use case described in the requirements' chapter \ref{section:scenarios}. It is a simple scenario, where two base stations are modelled, alongside a single satellite. The satellite is connected to the ground stations at different times, with some overlap.
The ground stations are supposed to gather data from the satellite, and provide an interface for the satellite to get the current time. The satellite is supposed to send a message to the ground stations about a measurement it took and then fetch the time. It will try to accomplish this every second.
Both ground stations share the same docker image \verb|base-station|, as they are functionally identically. The satellite uses a different image \verb|satellite|, as it has different functionality. Both images are built as simple Node\footnote{\url{https://nodejs.org/en}} applications for simplicity. The applications are then packaged into docker images, available for the cluster.
The full source code for the applications and the scenario resource can be found under \verb|scenarios/one-sat-two-base|. An excerpt of the ground station application is shown in \ref{listing:scenario-a-ground-station}.
\begin{listing}[H]
\begin{minted}{js}
app.post('/transmit', async (request) => {
request.log.info({ data: request.body })
})
app.get('/time', async () => {
return new Date().toISOString()
})
await app.listen({ port: 3000, host: '0.0.0.0' })
\end{minted}
\caption{Ground station application}
\label{listing:scenario-a-ground-station}
\end{listing}
Below is a snippet of the satellite application \ref{listing:scenario-a-satellite}. It makes use of the \verb|sidecar| to discover the ground stations. If it finds any, it will send a message, and fetch the time from set ground station. If no ground stations are found, it will idle.
\begin{listing}[H]
\begin{minted}{js}
const ips = await fetch('http://localhost:42069/discoverable')
.then((res) => res.json())
.catch(() => [])
if (!ips.length) {
logger.info('no peers found')
return
}
for (const ip of ips) {
fetch(`http://${ip}:3000/time`)
.then((res) => res.text())
.then((time) => logger.info({ peer: ip }, `time from peer: ${time}`))
.catch(logger.error)
fetch(`http://${ip}:3000/transmit`, {
method: 'POST',
body: `Observation: ${Math.random()
}` }).catch(logger.error)
}
\end{minted}
\caption{Satellite application}
\label{listing:scenario-a-satellite}
\end{listing}
For the timeline of the simulation, the following events were defined:
\begin{enumerate}
\item Create a base-station with the name \verb|base0| at time $t=0$.
\item Create a base-station with the name \verb|base1| $t=0$.
\item Create a satellite with the name \verb|sat0| $t=0$.
\item Create a bidirectional \SI{10}{\mebi\byte\per\second} link between \verb|sat0| and \verb|base0| $t=15$.
\item Create a bidirectional \SI{1}{\mebi\byte\per\second} link between \verb|sat0| and \verb|base1| $t=30$.
\item Delete the link between \verb|sat0| and \verb|base0| $t=45$.
\item Delete the link between \verb|sat0| and \verb|base1| $t=60$.
\item End the simulation $t=80$.
\end{enumerate}
The resulting scenario file is shown below \ref{listing:scenario-a-scenario}.
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: iluzio.nicco.io/v1
kind: Scenario
metadata:
name: sat-base
spec:
events:
# Setup
- offset: 0
resource: node
action: create
id: base0
spec:
image: base-station
airGapped: false
- offset: 0
resource: node
action: create
id: base1
spec:
image: base-station
- offset: 0
resource: node
action: create
id: sat0
spec:
image: satellite
# Links
- offset: 15
resource: link
action: create
from: base0
to: sat0
direction: bi
spec:
bandwidth:
rate: 10mbps
limit: 2000000000
buffer: 200000
- offset: 45
resource: link
action: delete
from: base0
to: sat0
direction: bi
- offset: 30
resource: link
action: create
from: base1
to: sat0
direction: bi
spec:
bandwidth:
rate: 1mbps
limit: 2000000000
buffer: 200000
- offset: 60
resource: link
action: delete
from: base1
to: sat0
direction: bi
# End
- offset: 80
resource: scenario
action: end
\end{minted}
\caption{Scenario A}
\label{listing:scenario-a-scenario}
\end{listing}
The simulation was run with the following command:
\begin{minted}{bash}
kubectl apply -f scenarios/one-sat-two-base/scenario.yaml
\end{minted}
As soon as the resource is created, the controller starts the daemon and execution begins. As expected, all steps were executed as planned. The logs of the satellite are shown in \ref{listing:scenario-a-logs-sat}.
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{json}
{"level":30,"time":1680545308548,"msg":"no peers found"}
{"level":30,"time":1680545309415,"msg":"no peers found"}
// ...
{"level":30,"time":1680545319423,"msg":"no peers found"}
{"level":30,"time":1680545320432,"msg":"no peers found"}
// Connecting to base0
{"level":30,"time":1680545321430,"peer":"10.244.120.91","msg":"time from peer: 2023-04-03T18:08:41.426Z"}
{"level":30,"time":1680545322426,"peer":"10.244.120.91","msg":"time from peer: 2023-04-03T18:08:42.424Z"}
// ...
{"level":30,"time":1680545334448,"peer":"10.244.120.91","msg":"time from peer: 2023-04-03T18:08:54.447Z"}
{"level":30,"time":1680545335449,"peer":"10.244.120.91","msg":"time from peer: 2023-04-03T18:08:55.449Z"}
// Connecting to both ground stations
{"level":30,"time":1680545336456,"peer":"10.244.120.91","msg":"time from peer: 2023-04-03T18:08:56.454Z"}
{"level":30,"time":1680545336461,"peer":"10.244.120.89","msg":"time from peer: 2023-04-03T18:08:56.458Z"}
// ...
{"level":30,"time":1680545348478,"peer":"10.244.120.89","msg":"time from peer: 2023-04-03T18:09:08.476Z"}
{"level":30,"time":1680545348478,"peer":"10.244.120.91","msg":"time from peer: 2023-04-03T18:09:08.477Z"}
{"level":30,"time":1680545349481,"peer":"10.244.120.91","msg":"time from peer: 2023-04-03T18:09:09.480Z"}
// Disconnecting from one ground station base0
{"level":30,"time":1680545349481,"peer":"10.244.120.89","msg":"time from peer: 2023-04-03T18:09:09.480Z"}
{"level":30,"time":1680545350483,"peer":"10.244.120.89","msg":"time from peer: 2023-04-03T18:09:10.482Z"}
// ...
{"level":30,"time":1680545364510,"peer":"10.244.120.89","msg":"time from peer: 2023-04-03T18:09:24.509Z"}
{"level":30,"time":1680545365547,"peer":"10.244.120.89","msg":"time from peer: 2023-04-03T18:09:25.546Z"}
// Disconnecting from both
{"level":30,"time":1680545366509,"msg":"no peers found"}
{"level":30,"time":1680545367512,"msg":"no peers found"}
// ...
\end{minted}
\caption{Logs of the satellite}
\label{listing:scenario-a-logs-sat}
\end{listing}
A snippet of the logs of the ground station base0 is shown in \ref{listing:scenario-a-logs-base0}.
The ground station base1 is not shown here, but the logs are similar.
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{json}
{"level":30,"time":1680545307882,"msg":"Server listening at http://0.0.0.0:3000"}
{"level":30,"time":1680545321426,"reqId":"req-1","req":{
"method":"GET","url":"/time",
"hostname":"10.244.120.91:3000",
"remoteAddress":"10.244.120.90",
"remotePort":46546
},"msg":"incoming request"}
{"level":30,"time":1680545321429,"reqId":"req-1","res":{
"statusCode":200},
"responseTime":2.900707997381687,
"msg":"request completed"
}
{"level":30,"time":1680545321430,"reqId":"req-2","req":{
"method":"POST",
"url":"/transmit",
"hostname":"10.244.120.91:3000",
"remoteAddress":"10.244.120.90",
"remotePort":46558},
"msg":"incoming request"
}
{"level":30,"time":1680545321430,"reqId":"req-2","data":"Observation: 0.34461038383534204"}
{"level":30,"time":1680545321430,"reqId":"req-2","res":{
"statusCode":200},
"responseTime":0.9071250036358833,
"msg":"request completed"
}
\end{minted}
\caption{Logs of the ground station base0}
\label{listing:scenario-a-logs-base0}
\end{listing}
\section{Interpretation}
The scenario tested here is a simple one, but it shows the potential of the simulator.
Based on these results, the following table shows which requirements have been fulfilled.
"Yes" means that the requirement has been fully fulfilled, "Partially" means that the requirement has been fulfilled to a certain degree, and "No" means that the requirement has not been fulfilled.
\begin{table}[h]
\centering
\begin{tabular}{|l|c|}
\hline
Requirement & Fulfilled \\
\hline\hline
\reqitem{1} & Yes \\ \hline
\reqitem{2} & Yes \\ \hline
\reqitem{3} & Yes \\ \hline
\reqitem{4} & Yes \\ \hline
\reqitem{5} & Yes \\ \hline
\reqitem{6} & Yes \\ \hline
\reqitem{7} & Yes \\ \hline
\reqitem{8} & Partially \\ \hline
\reqitem{9} & Partially \\ \hline
\reqitem{10} & Yes \\ \hline
\reqitem{11} & Yes \\ \hline
\reqitem{12} & Yes \\ \hline
\end{tabular}
\caption{Requirements overview}
\label{table:interpretation-requirementes}
\end{table}
Most of the requirements have been fulfilled completely. Unfortunately, the requirements \reqitem{8} and \reqitem{9}, while having the bulk of the functionality, miss some features. These are discussed in the following section.
The controller managed to orchestrate all moving parts inside of the cluster successfully by using the event-driven architecture of the operator. Apart from some initial errors during development, the control loop quickly became very stable and reliable, giving it a high degree of confidence. It manages to handle multiple simulations at the same time, and can be easily extended to support more complex scenarios.
\section{Critical considerations}
While this work should act as a solid foundation for future work, there are some critical aspects that should be discussed and taken into consideration.
\subsection{Requirement \reqitem{8}}
The requirement \reqitem{8} states that it should be possible to create, delete or modify links while the simulation is running. Creation and deletion of links works as expected, however modifying links is not possible in a straightforward way. This is due to limitations of the NetworkChaos CRD of Chaos Mesh\footnote{\url{https://chaos-mesh.org/docs/run-a-chaos-experiment/\#update-chaos-experiments-using-commands}}.
This limitation, however, can be circumvented by deleting and recreating the link with the new parameters. While this approach is not ideal, it allows to alter properties of a link while running a scenario. The main downside is that the link will be down for a brief moment, or experience weird behaviour while one link is deleted and another one is created.
\subsection{Requirement \reqitem{9}}
This shortcoming might be the most critical one. The requirement \reqitem{9} requires the simulation to support bandwidth, latency, packet-loss, jitter. While the simulator supports all of these, it does not support them at the same time. Specifically it is not possible to limit the bandwidth in combination with other faults. All, non-bandwidth faults, can be used simultaneously. For simulating real links this is a limitation. The root cause is a bug in the NetworkChaos framework\footnote{\url{https://github.com/chaos-mesh/chaos-mesh/issues/3631}} that overwrites the underlying tc rules when using bandwidth limits. It has been acknowledged by the Chaos Mesh team that this is not ideal behaviour, and should be addressed.
Time was spent trying to identifying the issue, and while a possible culprit was found\footnote{\url{https://github.com/chaos-mesh/chaos-mesh/blob/14c3b515ce9eb52457cea83d93cb4697bc2aec8e/controllers/podnetworkchaos/controller.go\#L232-L256}}, it was ultimately outside the scope of this work to fix it.
\subsection{Logging}
It would be beneficial to have a unified logging interface for the simulator. This would allow to more easily gather all information needed to evaluate a simulation.
One possible solution would be to add a logging endpoint to the sidecar.
This would allow for simple access to the running image, which then could easily integrate own metrics and events into the logging system.
\section{Future work}
Some optional features that could be added to the simulator in the future are shown below.
\begin{itemize}
\item A potential online editor for creating the scenarios.
\item Support more fault types provided by Chaos Mesh.
\item Distribute the operator either as Python package, docker image or helm chart.
\end{itemize}

View File

@ -0,0 +1,33 @@
\chapter{Fundamentals}
In this chapter, the theoretical background of the topic will be discussed.
\section{Delay / Disruption-Tolerant Networking}
\ac{dtn} emerged in the early days of space exploration and became an apparent problem that needed to be tackled before launching the first satellites into orbit. While space is still the main area of interest for \ac{dtn}, it also has applications in other areas such as disaster and emergency response where infrastructure might be damaged or destroyed.
\ac{dtn} is an alternative approach to traditional computer networking protocols. \ac{dtn} focuses on allowing communication to happen between nodes in a loosely connected and every changing topology of the network. The idea is that the network does not follow a fixed topology, but rather a network that is changing over time.
These changes can be predictable, periodic, or chaotic and no assumptions are being made about the stability or reliability of links between nodes.
Most \ac{dtn} protocols rely on some form of store and forward routing, where the messages are not immediately passed on to the next node, but rather stored in a queue. This is due to the availability of the next node, which is not always known and can vary substantially.
Within the store and forward family of routing protocols, there is a distinction between protocols that replicate data and those who only forward information onward.
This is in stark contrast to the commonly used TCP and UDP protocols, where non-availability / reachability of the next node leads to degradation of service, as those protocols require a steady connection between the two communicating nodes to ensure correct functioning. See \ref{fig:dtn-vs-tcp} for a simplified visual comparison.
\begin{figure}[h]
\label{fig:dtn-vs-tcp}
\caption{Simplified message delivery comparison between DTN and TCP}
\centering
\includegraphics[width=0.75\textwidth]{dtn-vs-tcp.png}
\end{figure}
\section{Simulator}
Simulators are a tool that enable us to mimic certain environment and behaviours to match another set of environment, often, the real-world. A simulator has two parts: the environment; and the agent that interacts with said environment.
The goal of a simulation is to replicate the target environment as close and accurate as possible. As a result of that, we can say that the perfect simulator is the one where the agent acting inside it cannot differentiate between real and simulated “worlds”. They become indistinguishable from another.
A perfect simulation is very hard, if not impossible, therefore most simulators generally pick important features that are relevant to the application and try to mimic those as good as possible.
Another significant factor is the resolution or quality of a simulation. A simulator is only useful if it can emulate a said environment with less effort and/or cost. Otherwise, it would be easier to not use it.
Simulators that are realised with computers have another very useful benefit for scientific work. As computer programs tend to be deterministic, simulators can inherit this property. Deterministic simulation are repeatable and therefore are adequate tools for comparing and measuring improvements or differences between different agents, leaving the "noise" and randomness out of the results, making them much more actionable and meaningful.

View File

@ -0,0 +1,726 @@
\chapter{Implementation}
\label{chapter:implementation}
This chapter will discuss the implementation details of the simulator. It will describe the different components and their interactions. It will also discuss the design decisions that were made during the development of the simulator, providing an insight into the reasoning behind the design.
\section{Naming}
The name of the simulator is \textit{Iluzio}, which means \textit{illusion} in Esperanto. The name was chosen because the simulator should aim to create an illusion of a real network.
As Custom Resource Definitions (CRDs) are a core part of the Kubernetes API, it is important to choose a name that is not already taken. This means that the name of the CRDs should be unique. Every CRD requires a unique \verb|group|, followed by a name for ech CRD under that group. The chosen group is \verb|iluzio.nicco.io|. The name what chosen as \verb|nicco.io| is the domain name of the author of this work. This should ensure that the name is unique and will not conflict with other CRDs.
Under the group \verb|iluzio.nicco.io|, the following CRDs are defined (in the plural form):
\begin{itemize}
\item \verb|scenarios| - The main CRD that defines the simulation
\item \verb|nodes| - A node in the simulation
\item \verb|links| - A link between two nodes in the simulation
\end{itemize}
\section{Controller}
As discussed before, the controller is being implemented as a Kubernetes Operator. While writing operators from scratch is possible, since Kubernetes exposes an API which one can use, it is not very maintainable in the long run. There are a few projects that try to tackle this issue and present themselves as "Operator Frameworks"
\begin{itemize}
\item Kube Builder \footnote{\url{https://kubebuilder.io/}}
\item Kopf \footnote{\url{https://github.com/nolar/kopf}}
\item Operator Framework \footnote{\url{https://operatorframework.io/}}
\item kube-rs \footnote{\url{https://kube.rs/}}
\item Java Operator SDK \footnote{\url{https://github.com/java-operator-sdk/java-operator-sdk}}
\item KubeOPS \footnote{\url{https://buehler.github.io/dotnet-operator-sdk/}}
\end{itemize}
Most of the above listed are very low-level frameworks that allow the user to get deep into the inner workings of Kubernetes, at a cost. Since the simulator does not need deep access to the low-level internals of Kubernetes, the framework was selected that is deemed more readable and maintainable and easy to develop. In this case, the choice of Kopf seemed the framework that most fitted the requirements for this work. It has an event-driven approach first and abstracts a lot of boilerplate code required such that the actual domain logic of the simulator is more readable and understandable. Kopf was originally developed at Zalando \footnote{\url{https://engineering.zalando.com/}} and later made open source. It is active being maintained and developed at the time of writing (\today).
The controller can be run as cluster wide, or in a namespaced scope. The latter is the recommended way of running the controller, as it allows for better isolation.
The controller reacts to lifecycle methods of the Scenario CRD, which will be described below. Internally, it will create and delete other CRDs, such as Nodes and Links and act upon in an event driven, asynchronous manner. This allows the controller to separate logic and concerns for the different subcomponents.
Another option is to create the nodes and links with their appropriate CRDs manually. The controller also listens to the creation and deletion of these CRDs, as they are used internally. This gives the user the option to create the nodes and links manually, should the scenario CRD be insufficient for the simulation.
\section{Scenario}
The Scenario is the heart of a simulation. It is the CRD that the user interacts with and defines the simulation. It is the only CRD that is not created by the controller, but by the user. The controller will then react to the creation of the CRD and start the simulation.
A scenario, at its core, is a list of events that trigger a change in the simulation. This change can create or delete either a node or a link. Additionally, there is an event for concluding the simulation. The events also have an \verb|offset| field, which defines the time offset in \si\ms from the start of the simulation, at which the event should be triggered.
Next, we will discuss how the scenario is implemented in the controller.
\subsection{Timers vs. Daemons}
By the nature of the simulation, there has to be a mechanism that will run the specified events at the specified time. In Kopf there are two possible ways of implementing this: timers\footnote{\url{https://kopf.readthedocs.io/en/stable/timers/}} and daemons\footnote{\url{https://kopf.readthedocs.io/en/stable/daemons/}}. Both options, in theory, can deliver the functionality required for the simulation. However, as the relative approach differs between the two, we will look at the advantages and disadvantages of each approach.
In \textit{timers}, the controller will periodically execute a given function. This interval is fixed and defined at startup. This means that the controller would need to define a fixed, minimum, frequency at which the timer would be executed. The controller could calculate the minimum required interval between runs before starting the timer, as all events are known beforehand. However this would add complexity without much benefit. Timers seems best fit to regular, cron-like, logic that needs to execute at a fixed interval.
On the other hand, \textit{daemons} are started when a given resource is created and share the same lifecycle as the resource they are responsible for. This means that the daemon runs continuously alongside the resource. This enables it to do arbitrary logic, at arbitrary intervals, as long as the resource exists. Therefore inherently have a higher degree of flexibility. As \textit{timers}, a daemon needs to decide when and how long to "sleep". However, the daemon has the liberty of choosing when, and for how long to "sleep".
For the purpose of orchestrating a scenario, the daemon approach deemed more fit for the use case. This means that the controller will start a daemon for each scenario that is being created. The daemon will then execute the events at the specified time offset. It achieves this by calculating the time delta between the current time and the time at which the next event is supposed to be executed. This allows the daemon to save resources, by not creating additional load on the system. An overview of the lifecycle of a scenario is shown in \ref{fig:scenario-lifecycle}.
All the resources created during a scenario are assigned a Kubernetes OwnerReference\footnote{\url{https://kubernetes.io/docs/concepts/overview/working-with-objects/owners-dependents/}} with the scenario resource as the owner. This means that when the scenario is deleted, all the resources created by the scenario will be deleted alongside it. This makes cleaning up the simulation easier and ensures that no resources are left behind, existing in the cluster. This is done by using the a utility function provided by the Kopf framework called \verb|kopf.adopt|\footnote{\url{https://kopf.readthedocs.io/en/stable/packages/kopf/\#kopf.adopt}}. This function will set the owner reference of the resource for us.
\begin{figure}[H]
\label{fig:scenario-lifecycle}
\caption{Lifecycle of a Scenario}
\centering
\includegraphics[width=0.5\textwidth]{scenario.mmd.pdf}
\end{figure}
\subsection{Events}
The core element of a scenario are it's events. They change the state of the running simulation.
Every event has at least two mandatory fields: \verb|resource| and \verb|offset|.
The offset is the time offset in \si\ms from the start of the simulation, not the previous event. The resource field is a string that defines the type of resource that is affected by the event.
There are 3 types of resources that can be affected: \verb|scenario|, \verb|node| and \verb|link|. Each available resource type will be explained below.
\subsubsection{Scenario}
Then \verb|resource: scenario| there is only one \verb|action| available: \verb|end|. This action will end the simulation. This is done by setting the \verb|status.ended| field of the scenario CRD to the current time. This will cause the scenario daemon to stop executing events and the simulation will end. An example can be seen below.
\begin{minted}{yaml}
- offset: 80
resource: scenario
action: end
\end{minted}
\subsubsection{Node}
For the \verb|resource: node| there are two actions available: \verb|create| and \verb|delete|.
The \verb|create| action will create a node with the given \verb|id|. The id must be unique within the scenario. Additionally the \verb|spec| field must be provided. This field contains the specification of the node \ref{listing:node-spec-example}.
\begin{minted}{yaml}
- offset: 0
resource: node
action: create
id: base0
spec:
image: base-os
airGapped: false
\end{minted}
The \verb|delete| action will delete the node with the given \verb|id|. An example can be seen below.
\begin{minted}{yaml}
- offset: 10
resource: node
action: delete
id: base0
\end{minted}
\subsubsection{Link}
For the \verb|resource: link| there are two actions available: \verb|create| and \verb|delete|.
The \verb|create| action will create a link between the nodes with the given \verb|from| and \verb|to| \verb|id|s.
The \verb|direction| field specifies the directionality of the link. It can be either \verb|uni| or \verb|bi|.
Additionally the \verb|spec| field must be provided. This field contains the specification of the link \ref{listing:link-example-bandwidth}.
\begin{minted}{yaml}
- offset: 15
resource: link
action: create
from: base0
to: sat0
direction: bi
spec:
bandwidth:
rate: 10mbps
limit: 2000000000
buffer: 200000
\end{minted}
The \verb|delete| action will delete the link between the nodes with the given \verb|from| and \verb|to| \verb|id|s, given a \verb|direction|. An example can be seen below.
\begin{minted}{yaml}
- offset: 45
resource: link
action: delete
from: base0
to: sat0
direction: bi
\end{minted}
\subsection{Persistence}
In order to make the controller idempotent, a common architecture goal for Kubernetes operators, the controller will set specific data on the CRD it manages that represent the current state. This is done usually in the \verb|status| field of the CRD. The actual creation of these labels is handled by the respective daemon responsible for a specific CRD. This ensures that even if the daemon, or event the whole operator, is restarted, the state of the simulation is still preserved and the simulation can be resumed.
The following fields are used for the scenario CRD:
\begin{itemize}
\item \verb|status.started|: Unix timestamp of when the simulation was started, in \si\ms.
\item \verb|status.ended|: Unix timestamp of when the simulation ended, in \si\ms.
\item \verb|status.events[i].executed|: Unix timestamp of when the event was executed, in \si\ms. If set, the event was executed, otherwise empty.
\end{itemize}
\begin{figure}[H]
\label{fig:scenario-fields-lifecycle}
\caption{Status fields of a Scenario}
\centering
\includegraphics[width=0.75\textwidth]{scenario_fields.mmd.pdf}
\end{figure}
\subsection{Specification}
\subsubsection{CRD}
The Scenario CRD is defined as follows \ref{listing:crd-scenario}.
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: scenarios.iluzio.nicco.io
spec:
scope: Namespaced
group: iluzio.nicco.io
names:
kind: Scenario
plural: scenarios
singular: scenario
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
\end{minted}
\caption{Scenario CRD}
\label{listing:crd-scenario}
\end{listing}
\subsubsection{Example}
An example can found in the below \ref{listing:scenario-example-simple}. In the example, a scenario is created that will create two nodes, a base station and a satellite. After 15 seconds it will then create a link between the two nodes. After 45 seconds (from the start of the simulation) the link will be deleted. The scenario will then wait for another 15 seconds before terminating, lasting 60 seconds in total.
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: iluzio.nicco.io/v1
kind: Scenario
metadata:
name: one-sat-two-base
spec:
events:
# Setup
- offset: 0
resource: node
action: create
id: base0
spec:
image: base-station
- offset: 0
resource: node
action: create
id: sat0
spec:
image: satellite
# Links
- offset: 15
resource: link
action: create
from: base0
to: sat0
direction: bi
spec:
bandwidth:
rate: 10mbps
limit: 2000000000
buffer: 200000
- offset: 45
resource: link
action: delete
from: base0
to: sat0
direction: bi
# End
- offset: 60
resource: scenario
action: end
\end{minted}
\caption{Example Scenario CRD}
\label{listing:scenario-example-simple}
\end{listing}
\section{Node}
A node is a container that is created by the simulator. It is the basic building block of a simulation. It can be controlled by events in a scenario. The user must specify an \ac{oci} compatible image, such as a docker image, that is used for the container. The image has to be available in the container registry of the Kubernetes cluster.
Each node must be assigned a unique ID. This ID is used to identify the node in the simulation. The ID has to conform to the Kubernetes naming schema\footnote{\url{https://kubernetes.io/docs/concepts/overview/working-with-objects/names/\#names}}. Each ID will be prefixed by the name of the scenario, to avoid name collisions.
Optionally, the user can specify other container spec fields\footnote{\url{https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.19/\#container-v1-core}}, such as limiting resources of the container, environment variables, etc.
The container itself lives inside a Kubernetes Pod alongside the sidecar container provided by the simulator.
A Kubernetes Deployment with only a single replica is created for that Pod, which is responsible for the lifecycle of the Pod.
In addition to the Deployment, the simulator also creates a Kubernetes Service for the Pod, which is used for making other nodes discoverable and a Network Policy that restricts the network traffic of the Pod by partitioning the network.
\begin{figure}[H]
\label{fig:node-overview}
\caption{lifecycle of a scenario}
\centering
\includegraphics[width=0.5\textwidth]{Single Node.pdf}
\end{figure}
\subsection{Specification}
\subsubsection{CRD}
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: nodes.iluzio.nicco.io
spec:
scope: Namespaced
group: iluzio.nicco.io
names:
kind: Node
plural: nodes
singular: node
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
\end{minted}
\caption{Example Iluzio Node}
\label{listing:node-spec-crd}
\end{listing}
\subsubsection{Native Kubernetes resources}
The following are the templates used for the three Kubernetes resources that are created for each node: Deployment \ref{listing:node-deployment-spec}, Service \ref{listing:node-service-spec}, and Network Policy \ref{listing:node-network-policy-spec}.
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: apps/v1
kind: Deployment
metadata:
name: ${id}
spec:
replicas: 1
selector:
matchLabels:
node: ${id}
template:
metadata:
name: pod-${id}
labels:
node: ${id}
spec:
dnsPolicy: ClusterFirst
containers:
# Image
- name: app
image: ${image}
imagePullPolicy: Never
resources: ${resources}
# Sidecar
- name: sidecar
image: sidecar
imagePullPolicy: Never
env:
- name: SERVICE
value: ${id}
resources: {}
\end{minted}
\caption{Node Deployment}
\label{listing:node-deployment-spec}
\end{listing}
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: v1
kind: Service
metadata:
name: ${id}
spec:
clusterIP: None
selector:
receive-node-${id}: enabled
\end{minted}
\caption{Node Service}
\label{listing:node-service-spec}
\end{listing}
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
kapiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: ${id}
spec:
podSelector:
matchLabels:
node: ${id}
ingress:
# Internal DNS
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
# All the pods in the same namespace
- from:
- podSelector:
matchLabels:
send-node-${id}: enabled
egress:
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
- to:
- podSelector:
matchLabels:
receive-node-${id}: enabled
\end{minted}
\caption{Node Network Policy}
\label{listing:node-network-policy-spec}
\end{listing}
\subsubsection{Example}
An example for a node specification can be found in the listing \ref{listing:node-spec-example}. In the example the node is named \verb|sat0| and uses the image \verb|sat-os|, which is made available to the registry of the cluster. The node is air gapped, meaning that it cannot communicate with the internet. The node is limited to 128Mi of memory, 500m of CPU and 100Mi of ephemeral storage.
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: iluzio.nicco.io/v1
kind: Node
metadata:
name: sat0
spec:
image: sat-os
airGapped: true
resources:
limits:
memory: '128Mi'
cpu: '500m'
ephemeral-storage: "100Mi"
\end{minted}
\caption{Example Iluzio Node}
\label{listing:node-spec-example}
\end{listing}
\subsection{Labels}
Labels are an essential tool in Kubernetes, and the simulator also relies on their mechanism to work. In the simulator, they are used to identifying pods for the following purposes:
\begin{itemize}
\item Identifying the pods that should be discoverable by other pods
\item Determining ingress and egress traffic of a pod to another given pod
\item Identifying the pods that should be targeted by Chaos Mesh.
\end{itemize}
Each Pod and Deployment are given the label \verb|node: <id>|. This ID is used by the Service and Chaos Mesh to identify the pods that should be targeted.
For controlling the NetworkPolicy, dynamic labels are used. These labels are created by the simulator and are used to match to predefined NetworkPolicy rules. The labels are created in the following format:
\begin{minted}[fontsize=\scriptsize]{yaml}
node: <id>
send-node-<id>: enabled
receive-node-<id>: enabled
\end{minted}
How these labels are used is explained in the following sections \ref{fig:link-network-partition-labels}.
\subsection{Lifecycle}
The lifecycle of a node \ref{fig:node-lifecycle} inside a simulation has two possible states: existing and not existing.
On creation, the simulator starts not only the node but also a few other resources needed around it.
Whenever an event occurs that mandates the deletion of the node, all the resources created by the simulator are deleted. This includes the Pod, Deployment, Service, and Network Policy.
\begin{figure}[H]
\label{fig:node-lifecycle}
\caption{lifecycle of a scenario}
\centering
\includegraphics[width=1\textwidth]{node.mmd.pdf}
\end{figure}
As the scenario, the node adopts all the resources it creates, creating a OwnerReference to itself for the Deployment, Service, and Network Policy. Therefore, when the node is deleted, all the resources are deleted as well.
\subsection {Node Discovery}
As discussed before, the nodes are discovered by a sidecar container that runs in the same pod as the controller.
This sidecar container is a simple Node\footnote{\url{https://nodejs.org/}} application that uses the Kubernetes DNS to discover the other nodes in the cluster.
It then exposes this information via an HTTP endpoint.
The simulation node inside the same pod then uses this endpoint to discover the other simulation nodes it can then connect to.
Each node in the simulator has an associated Kubernetes Service in which other nodes can be registered, and therefore marked as "connected" and discoverable.
For each Service, Kubernetes creates a DNS record for each registered node that matches the Service's selector.
We leverage that feature to retrieve the IP addresses of the other nodes in the cluster.
By default, services act as a load balancer and therefore distribute the traffic to all registered nodes. By configuring the service as a headless service\footnote{\url{https://kubernetes.io/docs/concepts/services-networking/service/\#headless-services}}, the load balancing is disabled and the DNS record will contain all registered nodes.
For looking up the DNS records, \verb|dig| was chosen as \verb|nslookup| is deprecated\footnote{\url{https://cr.yp.to/djbdns/nslookup.html}} and \verb|dig| supports the \verb|+short| option which makes it easy to retrieve the IP addresses of the nodes. As an example, the following command retrieves the IP addresses of all nodes that are registered to the service \verb|service-a|.
\begin{minted}{bash}
dig +short +search service-a
\end{minted}
After the IP addresses are retrieved, the sidecar container will expose them via an HTTP endpoint. The endpoint is \verb|localhost:42069/discoverable| and returns a JSON array of the IP addresses of the other nodes. Alternatively, the endpoint can format the IPs to a new line—separated list by adding the query parameter \verb|format=plain| to the URL.
The sidecar container was written in Go\footnote{\url{https://go.dev/}} as it is a compiled language and therefore has a relatively small footprint. It is widely used in the Kubernetes ecosystem and is easy to manage and maintain.
\section{Link}
\subsection{Network disruption and degradation}
To realise the faults and non-ideal network links, the library Chaos Mesh\footnote{\url{https://chaos-mesh.org/}} was selected, as it is well maintained and being active developed. it's a CNCF project\footnote{\url{https://www.cncf.io/projects/}} and therefore has a wide community behind it.
It fits the simulator very well as it builds on Kubernetes standards and therefore integrates very well in the architecture of the simulator.
Chaos Mesh offers a variety of different network faults which fits the needs of the simulator perfectly. Fault categories supported include: delay (latency, jitter), reordering, loss, duplication, corruption, and bandwidth (rate, limit, buffer) and a few more.
The simulator will primarily use the NetworkChaos \footnote{\url{https://chaos-mesh.org/docs/simulate-network-chaos-on-kubernetes/}} to emulate different network link characteristics and faults.
Under the hood Chaos Mesh uses the Linux Traffic Control \verb|tc|\footnote{\url{https://man.archlinux.org/man/tc.8}} system and its addition \verb|netem|\footnote{\url{https://man.archlinux.org/man/core/iproute2/tc-netem.8}} to create the faults.
As of the time of writing (\today), the ChaosMesh has a bug\footnote{\url{https://github.com/chaos-mesh/chaos-mesh/issues/3631}} that prevents having bandwidth limits on the same NetworkChaos in combination with other faults.
Another to the advantages of Chaos Mesh, it includes a variety of additional faults that it can induce in a system, making it a great starting point for adding even more options later on. Examples of some non network related faults are disk errors and time skewing (clock skew).
Apart from Chaos Mesh, Network Policies \footnote{\url{https://kubernetes.io/docs/concepts/services-networking/network-policies/}} are used, a Kubernetes native feature, for creating network segmentation for the pods and therefore the simulation nodes.
This requires the cluster to be configured with a \ac{cni} that supports Network Policies. A widely used and supported \ac{cni} is Calico\footnote{\url{https://www.tigera.io/project-calico/}}. It's open source and the de facto standard for Kubernetes clusters.
\subsection{Common network settings}
There are base network rules that are applied to all nodes in a scenario.
By default, no traffic is allowed in or out of a node, except for the Kubernetes System.
This is required for the node discovery sidecar container to work, as it needs access to the Kubernetes DNS to resolve the other nodes.
The label used by the Network Policy to allow traffic to the Kubernetes System is \verb|kubernetes.io/metadata.name: kube-system|.
\begin{figure}[H]
\label{fig:node-network}
\caption{Common network overview for single node, not air gapped (NS: Namespace)}
\centering
\includegraphics[width=0.75\textwidth]{Network.pdf}
\end{figure}
If selected, the nodes can have access to the internet. This is done by adding an exception to the Network Policy. This works by allowing traffic to all IPs, therefore \verb|0.0.0.0/0|, except all the private IP ranges defined by the IETF \cite{rfc1918}, which are \verb|10/8|, \verb|172.16/12| and \verb|192.168/16|. The field to toggle whether the node has access to the internet is the \verb|spec.airGapped| field of the node resource, which by default is set to \verb|true|.
These settings are common, which means that they do not reside in a global, single Network Policy, but are applied to each node individually. This gives the maximal flexibility to the user, as they can choose to have a single node with internet access, or all nodes, or none at all.
\subsection{Network partitioning}
To realise network segmentation, both Network Policies and Chaos Mesh were considered. The following table compares the two approaches.
\begin{table}[H]
\centering
\begin{tabular}{|l|l|l|}
\hline
& \textbf{Network Policies} & \textbf{Chaos Mesh} \\ \cline{2-3}
\textbf{Advantages} & Native to Kubernetes & Already installed, no overhead added \\ \hline
\textbf{Disadvantages} & Require \ac{cni} support. & Additional complexity and unnecessary dependency \\ \hline
\end{tabular}
\caption{Comparison of Network Policies and Chaos Mesh}
\label{tab:network-policies-vs-chaos-mesh}
\end{table}
As seen in Table \ref{tab:network-policies-vs-chaos-mesh}, both would be a viable option. In this case NetworkPolicies where chosen as they are the native way of doing things in Kubernetes and therefore the most maintainable and future proof option, while having all the functionality required for the simulator. Both option support unidirectional control.
For each node in the scenario, a Network Policy is created that allows traffic exclusively to the Kubernetes System.
This is done by creating a Network Policy that allows traffic to the Kubernetes System and denies all other traffic.
When a link is created between two nodes, the Network Policy is updated to allow traffic to the other node as well.
This exception follows the lifecycle of the link and is removed when the link is deleted.
The Network Policy is deleted when the node is deleted.
The network policies are steered by labels, which are applied to the pods. The labels are created by the controller whenever a link is created or removed.
By using labels, the network policies can be updated without having to delete and recreate them, as they can leverage the native \verb|matchLabels| selector.
As both unidirectional and bidirectional links are supported, two types of labels are required, one for ingress and one for egress.
The labels are created as follows:
\begin{itemize}
\item \verb|send-node-<id>: enabled|
\item \verb|receive-node-<id>: enabled|
\end{itemize}
The \verb|<id>| is the unique identifier of the node, which should be connected to the node the policy is applied to. An example of how labels are shown below \ref{fig:link-network-partition-labels}.
\begin{figure}[H]
\label{fig:link-network-partition-labels}
\caption{Network partitioning by labels}
\centering
\includegraphics[width=1\textwidth]{Network Labels.pdf}
\end{figure}
\subsection{Lifecycle}
The link lifecycle is shown in Figure \ref{fig:link-lifecycle}. Whenever a link is created, the controller will patch the labels of the pods of the nodes that should be connected. The exact labels are shown in the previous section.
Additionally to patching the labels, the controller will also create the NetworkChaos resource for the link. This resource is being provided by Chaos Mesh and is used to create the network degradation.
Whenever a link is deleted, the controller will revert the labels it previously modified and the NetworkChaos resource will be deleted automatically, as it is owned by the link thanks the OwnerReference.
\begin{figure}[H]
\label{fig:link-lifecycle}
\caption{Link lifecycle}
\centering
\includegraphics[width=1\textwidth]{link_lifecycle.mmd.pdf}
\end{figure}
\subsection{Specification}
\subsubsection{CRD}
The link CRD is defined as follows \ref{listing:link-crd}:
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: links.iluzio.nicco.io
spec:
scope: Namespaced
group: iluzio.nicco.io
names:
kind: Link
plural: links
singular: link
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
\end{minted}
\caption{Link CRD}
\label{listing:link-crd}
\end{listing}
The following fields are available in the link CRD \verb|spec| field.
For details about the ChaosMesh parameters, refer to the \href{https://chaos-mesh.org/docs/simulate-network-chaos-on-kubernetes/\#field-description}{documentation of Chaos Mesh NetworkChaos}.
If the \verb|direction| is set to \verb|"bi"|, the \verb|from| and \verb|to| fields are can be swapped, as it would not have any effect.
If the \verb|direction| is set to \verb|"uni"|, all disruptions will only applied in the direction of the link.
% Table with link fields, required, type, description
\begin{table}[H]
\centering
\begin{tabular}{|l|l|l|l|}
\hline
\textbf{Field} & \textbf{Required} & \textbf{Type} & \textbf{Description} \\
\hline\hline
\verb|from| & Yes & string & The name (id) of the sending node. \\ \hline
\verb|to| & Yes & string & The name (id) of the receiving node. \\ \hline
\verb|direction| & Yes & string & The direction of the link. Can be \verb|"uni"| or \verb|"bi"|. \\ \hline
\verb|bandwidth| & No & NetworkChaos parameter & The bandwidth of the link. \\ \hline
\verb|delay| & No & NetworkChaos parameter & The delay of the link. \\ \hline
\verb|loss| & No & NetworkChaos parameter & The loss of the link. \\ \hline
\verb|duplicate| & No & NetworkChaos parameter & The duplicate of the link. \\ \hline
\verb|corrupt| & No & NetworkChaos parameter & The corrupt of the link. \\ \hline
\end{tabular}
\caption{Link fields}
\label{tab:link-fields}
\end{table}
\subsubsection{Example}
Two examples can be seen below. The first one is a unidirectional link limiting bandwidth \ref{listing:link-example-bandwidth}, the second one is a bidirectional link with delay \ref{listing:link-example-delay}.
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: iluzio.nicco.io/v1
kind: Link
metadata:
name: some-link
spec:
from: a
to: b
direction: uni
bandwidth:
rate: 10kbps
limit: 3000
buffer: 1600
\end{minted}
\caption{Example link with limited bandwidth}
\label{listing:link-example-bandwidth}
\end{listing}
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: iluzio.nicco.io/v1
kind: Link
metadata:
name: some-link
spec:
from: a
to: b
direction: bi
delay:
latency: 50ms
correlation: '25'
jitter: 15ms
\end{minted}
\caption{Example link with delay}
\label{listing:link-example-delay}
\end{listing}
\subsection{Updating link properties}
Update is handled as deletion and creation of a new link.
This is due to the fact that the link is a CRD provided by Chaos Mesh and is (for now) immutable \footnote{\url{https://chaos-mesh.org/docs/run-a-chaos-experiment/\#update-chaos-experiments-using-commands}} \footnote{\url{https://github.com/chaos-mesh/chaos-mesh/issues/2227}}.
The link therefore deleted and a new one is created with the new parameters.
This leaves a small window of time where there are no restrictions on the network traffic.
\section{Miscellaneous Topics}
\subsection{DNS Propagation}
As per default, Kubernetes DNS (usually CoreDNS) is configured to cache DNS records for 30 seconds. This delays the propagation of DNS records to the pods and therefore how fast a Link is created. This time can be between 0 and TTL cache. This can be problematic for the simulator, as it needs up to date DNS records to make nodes available to other nodes. It is therefore reccomended to disable or lower the TTL cache. To configure the cluster DNS refer to the Kubernetes documentation\footnote{\url{https://kubernetes.io/docs/tasks/administer-cluster/dns-custom-nameservers/}}. For most clusters with common configuration, this can be achieved by changing the following CoreDNS configmap:
\begin{listing}[H]
\begin{minted}[fontsize=\scriptsize]{yaml}
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns
namespace: kube-system
data:
Corefile: |
.:53 {
# ... other config
cache 1
}
\end{minted}
\caption{Node Service}
\label{listing:coredns-configmap}
\end{listing}

View File

@ -0,0 +1,34 @@
\chapter{Introduction}
\subsubsection{Background and Motivation}
As \acl{dtn} is becoming a focus in recent research, the need for adequate tooling is rising accordingly. While not in its infancy, research around \ac{dtn} has mainly been conducted by space agencies and therefore the ecosystem for open tools is still lacking.
In order to further research and develop \ac{dtn} and the related protocols, we require a way to evaluate and simulate different protocols in different scenarios involving different network topologies.
As in any field of science, measurement is a necessary step in assessing whether a newly developed technology is an actual improvement or not compared to the existing state of affairs. This means that metrics are needed to evaluate progress. Generally, experimental evidence and measurements are used to gather data.
However, in DTN the scale of the network is often on a planetary level, which makes actual testing very resource intensive and in most cases impossible. With simulations, it is possible to gather metrics and data without the need to launch actual hardware into space. Running simulations of the network manually without automation is a time-consuming and error-prone process, as it involves plenty of moving parts to be done correctly. Keeping an overview and correcting behaviour manually is not a feasible task, which can be solved by using a simulator. Additionally, simulators give reproducibility, which is key when comparing between different protocols as the environments will be exactly the same across multiple tests and runs.
Simulations, by comparison, require very little resources. Therefore, they can be used not only to evaluate, but also can be integrated into the development process of DTN protocols. This can accelerate the development process and makes comparison between different protocols and implementations much easier and efficient.
\subsubsection{Aims of this work}
There are pre-existing simulators for such a task, but they are not well-suited to our needs, as will be discussed later.
This work aims at developing a tool that should allow for dynamic, deterministic and chaotic simulations with varying numbers of participants, each with different capabilities.
Every part of the tool should be driven by configuration files and be definable by the user in a readable and flexible way.
To achieve the level of granular control and dynamic flexibility, this work proposes to have a Controller-Node architecture, where the Controller is the main brain of the simulation while the nodes are freely configurable. The programming of the nodes should be language-agnostic so that they can be programmed on their own, giving them a high degree of freedom. The controller, on the other hand, would orchestrate the simulation and networking that has to be done.
The controller would run the simulation according to the spec and provide all the data needed to evaluate the run. Such data will include information about topology changes, network activity between each node. Different collectors / ingest systems could be proposed in addition to log file or test run file.
\subsubsection{Outline of this work}
After this introduction, there will be a chapter on the fundamentals of \ac{dtn} which we require to understand what kind of tool we need. Then we will suggest requirements for the tool that will be derived from use cases that need to be considered and are desirable.
Next, the current state-of-the-art will be analysed and explored to see what tools are presently available, what are their capabilities and where they are lacking in terms of needed functionality.
Finally, we will propose a more concrete concept for our tool and its subsystems and parts that we want to achieve.
After implementation, there will be a chapter that will analyse what was accomplished and go over the requirements postulated beforehand to evaluate how the tool performs. Furthermore, it will look at what might be improved and what could be done in the future to further enhance it.

View File

@ -0,0 +1,94 @@
\chapter{State-of-the-Art and related work}
The following simulators were chosen for this comparison \ref{table:dtn-simulators-chosen}. They were mostly selected based on discoverability and being open source.
\begin{table}[h]
\centering
\caption{Chosen simulators}
\label{table:dtn-simulators-chosen}
\begin{tabular}{ l|r|r }
Name & Language & Last Update \\
\hline
The One \cite{sim-theone} & \verb|Java| & \formatdate{27}{10}{2015} \\
OPS \cite{sim-ops} & \verb|C++| & \formatdate{13}{6}{2022} \\
ns3-dtn-bit \cite{sim-ns3} & \verb|C++| & \formatdate{2}{6}{2018} \\
dtnsim \cite{sim-dtnsim} & \verb|Python| & \formatdate{8}{12}{2021} \\
DTN \cite{sim-dtn} & \verb|C#| & \formatdate{8}{8}{2017} \\
\end{tabular}
\end{table}
\section{Overview of single simulators}
\subsection{The One}
The One is one of the most popular and well known simulators available for \ac{dtn} that started out in 2007.
It has quite a number of features available and prebuilt in.
For movement, we have many prebuilt options such as map based movement, random, and a few more.
Routing also has the basic and a few more routing algorithms available, such as First Contact, Epidemic, Spray and Wait, DIrect delivery, ProPHET and MaxProp.
There is a GUI for following the simulation visually.
Reporting is configurable and can be done in a variety of formats.
Some shortcomings are that it only supports 2D topologies and the documentation is available but not extensive, only covers some very basic settings. Everything else needs to be looked up in the code base.
A bit problem is that The One does not support the use of real-world protocols for simulating the convergence layer, which is required by the \ac{bp}. This is essential for simulating current state of the art \ac{bp} protocols.
\subsection{OPS}
\ac{ops} is a simulator developed by the \href{https://www.uni-bremen.de/comnets}{Sustainable Communication Networks} of the \href{https://www.uni-bremen.de/}{University of Bremen}. It sits on top of \ac{omnet} and \ac{inet}, which are powerful event and network simulation frameworks. This allows \ac{ops} to be very flexible, extendable, and powerful.
\ac{ops} makes the distinction between link (convergence layer), routing and the app layer. These are separate components that allow for realistic simulations for \ac{bp}. Each layer is configurable with a lot of parameters to tune.
Most routing algorithms are available out of the box, and it is the only project in this list that has support for 3D space / coordinates.
It is the only simulator on this list that seems to be actively maintained and developed at this moment. The documentation is the most comprehensive in comparison to the other projects, but it only covers the basic features without going into details too much. Some examples/guides would be great. The setup is not easy, as \ac{ops} is made up of many software projects bundled together and takes a bit to get up and running.
\subsection{ns3-dtn-bit}
ns3-dtn-bit is based on \ac{ns3}, which is another network simulation framework. Unfortunately, ns3-dtn-bit seems more of a proof of concept than a real simulator.
It misses config driven scenarios, only implements \ac{cgr} and is not customizable. The convergence layer is not simulated here, only the routing, which makes it unsuitable for simulating \ac{bp}.
Also, the documentation is extremely limited.
\subsection{dtnsim}
dtnsim is a pure python implementation of a \ac{dtn} simulator. This makes it unsuitable for large scale simulations, as python performance will be a hindrance.
dtnsim includes routing protocols like PRoPHET and Epidemic, but not the underlying simulation of the convergence layer, unfortunately. This, again, makes it unsuitable for simulating \ac{bp}.
There is no documentation beyond reading the source code. Being in written in python lowers the barrier of entry for new users, but this is still not a real option for simulating diverse scenarios.
Scenarios are not config driven and require coding to be defined.
Also, the simulation environment is 2D.
\subsection{DTN}
Originally written as a master thesis \footnote{\url{https://raw.githubusercontent.com/szymonwieloch/DTN/master/DTN.pdf}}, DTN is quite a complete simulator.
It is aware of the convergence layer and includes \ac{tcp}, \ac{udp} and \ac{lldp}. Also, it includes most of the common routing protocols, including Static, Epidemic, Gradient, AODV, Dijkstra and Predictable.
It was written with the first draft of the \ac{bp} protocol in mind \cite{RFC5050}, so is specifically tailored for \ac{bp} simulations.
Documentation is difficult tu judge as it's not in English. Overall, this is the most mature simulator which is backed by a single person. Although, it's not maintained and/or developed anymore.
\section{Conclusion}
Below is a short summary of the key differences between the simulators that were considered. \ref{table:comparison}
\begin{table}[h!]
\centering
\label{table:comparison}
\begin{tabular}{c | c c c}
Simulator & Convergence Layer & Config driven & Maintained \\
\hline
The One & \XSolid & \Checkmark & \XSolid \\
OPS & \Checkmark & \Checkmark & \Checkmark \\
ns3-dtn-bit & \XSolid & \XSolid & \XSolid \\
dtnsim & \XSolid & \XSolid & \XSolid \\
DTN & \Checkmark & \Checkmark & \XSolid \\
\end{tabular}
\caption{Comparison of the simulators}
\end{table}
Most simulators have some kind of shortcomings. Almost all of them lack in documentation and examples, which implies a higher barrier of entry for new users.
The most complete, up-to-date and flexible simulator seems to be \ac{ops}, which seems the most mature and reasonable choice to simulate \ac{dtn} in the present day.
All other simulators seem unmaintained at this point or lack support for convergence layer simulations.

View File

@ -0,0 +1,147 @@
\chapter{Analysis of requirements}
\section{Properties for a simulator}
For evaluating different simulator frameworks, we need to first define what criteria we want to consider and weigh the importance accordingly. Below is a short list of criteria that we will use to evaluate the quality of a simulator.
\begin{itemize}
\item Routing
\item Modelling
\item Support for real-world protocols
\item Reporting
\item Extensibility
\item Documentation
\item Configuration
\end{itemize}
\subsection{Routing}
A good simulation framework should include basic routing algorithms for \ac{dtn}.
Examples might be Epidemic Routing, Flooding Routing, PRoPHET, MaxProp, RAPID, Spray and Wait, Bubble Rap Protocol, CafRep, RACOD. Furthermore it should be easy to add new or custom routing algorithms as needed.
\subsection{Modelling}
As \ac{dtn} covers a broad spectrum of possible applications and use cases, the resulting simulated network topologies can be predictable, dynamic, or sometimes chaotic.
Modelling this changing network structure is essential. This includes opportunistic and predefined links between nodes. They can be of periodic nature, randomized or anything in between.
One example would be modelling orbiting satellites, where the links are periodically broken and restored predictably. An example for an opportunistic topology would be modelling random ships in the ocean that need to communicate with each other, where the next link or node is not predictable.
\subsection{Support for real-world protocols}
Support for standards like the \ac{bp} V7 and the underlying convergence layers like \ac{tcp}, \ac{udp} or \ac{ltp} is desirable.
\subsection{Reporting}
Good reporting, meaning the ability to extract data and metrics from a given simulation, is key to process and comparing data further between different simulation runs and models in order to compare and improve the simulation.
This means that generally it is required that data can be exported in a machine-readable format such as csv, json or other formats that are easy to process further.
\subsection{Extendibility}
A good simulator would allow for extendibility and therefore the ability to customize every aspect of the simulation to the needs of the user.
\subsection{Documentation}
The quality of documentation determines the barrier of entry for new users. Ideally, extensive documentation and examples are available so that writing simulations is intuitive and easy.
\subsection{Configuration}
Naturally, there is a need to define simulation scenarios. This means that a user needs to somehow specify the topology and all the parameters for links, nodes, etc. to satisfy the constrains of a given scenario. This can be done by configuration files in a declarative manner, or programmatically. In either case, it is desirable that this happens in a concise and intuitive way that is easy to understand and create.
In order to derive requirements, we will look at some use cases. By analysing the single components and behaviour within a potential simulation, requirements can be inferred and formulated.
We will look at two example scenarios and
\section{Scenarios}
\label{section:scenarios}
\subsubsection{Simulation of satellite constellation}
A simulation that involves a group of satellites will have different satellites talking to each other. The connections will have a periodic connection, as orbits are predefined and encounters follow a structure that can be forecasted. Also, communication between satellites is homogenous, meaning that all links have the same quality, if present.
More concretely, we could, for example, imagine a constellation of 8 satellites orbiting around the earth at the speed. Each satellite is identical to the others in terms of capabilities. They have predefined orbits and therefore encounter each other at predefined times. In this case, we could assume that each satellite has a bandwidth of \SI{50}{\kibi\byte\per\second} and encounters another satellite every \SI{30}{\minute}.
So for any given satellite, it would mean that there is a window every \SI{30}{\minute} of \SI{30}{\second} to exchange packets with the other satellite. Each link would have a packet lost rate of \SI{2}{\percent}.
\subsubsection{Simulation of satellite ground station connections}
\label{sim-sat-ground}
If we look at a single satellite talking to ground stations while orbiting a planet, we can again see that we want to simulate periodic encounters. However, the links may have different qualities, depending on the ground station. This might have different reasons such as inclination, radar size and power at the ground station. In this case, we need to model different types of links, each with unique properties.
An example would to be to look at a communication satellite like the Iridium constellation\footnote{\url{https://en.wikipedia.org/wiki/Iridium_satellite_constellation}} orbiting at \SI{\approx27000}{\km\per\hour} at an altitude of \SI{\approx781}{\km}. This would yield an orbital period of \SI{\approx100}{\minute}.
We can add two ground stations, one with a better antenna and another with a smaller one. Each with different bandwidths, latencies, packet loss and duration of a single connection.
\begin{table}[h!]
\centering
\begin{tabular}{|c c c c c|}
\hline
Ground Station & Bandwidth & Latency & Packet Loss & Duration \\
\hline\hline
A & \SI{3}{\kibi\byte\per\second} & \SI{800}{\ms} & \SI{2}{\percent} & \SI{10}{\minute} \\
B & \SI{500}{\byte\per\second} & \SI{800}{\ms} & \SI{3}{\percent} & \SI{7}{\minute} \\
\hline
\end{tabular}
\caption{Ground stations \ref{sim-sat-ground}}
\label{table:sim-sat-ground}
\end{table}
\subsubsection{Simulation of sea traffic}
\label{sim-sea-traffic}
A scenario might also involve communication between different ships. While some ships might follow predefined routes, others follow a random path, taking unpredictable paths that lead to random encounters between the ships at sea. Also, having different equipment at its disposal for communication between different classes of ships might lead to inconsistent link qualities.
For instance, one could consider simulating a section of the Aegean Sea\footnote{\url{https://en.wikipedia.org/wiki/Aegean_Sea}}, where there are a lot of islands and private boat traffic that is unpredictable. Different ship types, from small fishing vessels, recreational boats to bigger cruising ships. While recreational boats might only have rudimentary communication equipment, bigger ships might be better equipped. Let's imagine we have 3 different connections types depending on the communicating parties.
\begin{table}[h!]
\centering
\begin{tabular}{|c c c c|}
\hline
Link Type & Bandwidth & Latency & Packet Loss \\
\hline\hline
A & \SI{50}{\kibi\byte\per\second} & \SI{100}{\ms} & \SI{3}{\percent} \\
B & \SI{1}{\mebi\byte\per\second} & \SI{50}{\ms} & \SI{2}{\percent} \\
C & \SI{50}{\mebi\byte\per\second} & \SI{20}{\ms} & \SI{1}{\percent} \\
\hline
\end{tabular}
\caption{Communication types for Sea Simulation \ref{sim-sea-traffic}}
\label{table:sim-sea-traffic-types}
\end{table}
We can assume a total of 10 ships for each connection type, for a total of 30, that encounter another ship at a random interval between \SI{1}{\minute} and \SI{5}{\minute}.
\section{Requirements}
From the scenarios above, we can derive some key requirements we want to fulfil for our project.
\noindent\textit{Runtime}
\reqlist{
\item The simulator should be implemented as a Kubernetes Operator \footnote{\href{https://kubernetes.io/docs/concepts/extend-kubernetes/operator/}{Kubernetes Operator}}
\item The simulator will orchestrate the creation and deletion of nodes.
\item The simulator will orchestrate the creation and deletion of network links between nodes.
}
\noindent\textit{Configuration}
\reqlist{
\item The configuration file should be a plain text file.
\item The configuration file should contain a set of events which the simlulator can act upon.
\item The set of events should be specified in a imperative, procedural manner.
}
\noindent\textit{Events}
\reqlist{
\item An event can create or destroy a node.
\item An event can create, destroy or modify a link.
\item A link can have the following properties: ${bandwidth, latency, packet loss, jitter}$ \ref{table:properties-link}
}
\noindent\textit{Scenario}
\reqlist{
\item It should be possible to create scenarios with periodic encounters of nodes.
\item It should be possible to create links with different properties across different nodes.
\item Scenarios should be deterministic and therefore repeatable.
}

View File

@ -0,0 +1,7 @@
\chapter{Summary}
The central scope of this work was to architect and implement a simulator tailored, but not strictly limited, to DTN networks. Additionally, the emphasis was made on extensibility and flexibility of the simulator to allow for contrived scenarios, which often are the reality in the field of DTN networks. The simulator should be agnostic to programming, languages and protocols, enabling the user full freedom to implement their own custom logic, while taking care of the orchestration and communication between the communicating parties.
Most of the goals were achieved, creating a tool that is flexible and extensible, that lives natively inside the Kubernetes ecosystem, taking advantage of the software already available in that space. Being natively integrated into Kubernetes allows to easily create bridges between the simulated, isolated "world" and real running applications or nodes. Being built on open standards set by Kubernetes, the simulator should stand the test of time and profit from the continuous additions and improvements to and in the Kubernetes ecosystem.
While there are some shortcomings that could be improved, the simulator is in a state where it can be used to simulate DTN networks and is a good starting point for further development. The simulator is open source and will be available on GitHub\footnote{\url{https://github.com/cupcakearmy/iluzio}}, where it can be used and extended by anyone.

296
thesis/src/db.bib Normal file
View File

@ -0,0 +1,296 @@
@article{jedari2018survey,
title = {A survey on human-centric communications in non-cooperative wireless relay networks},
author = {Jedari, Behrouz and Xia, Feng and Ning, Zhaolong},
journal = {IEEE Communications Surveys \& Tutorials},
volume = {20},
number = {2},
pages = {914--944},
year = {2018},
publisher = {IEEE},
}
@misc{vahdat2000epidemic,
title = {Epidemic routing for partially connected ad hoc networks},
author = {Vahdat, Amin and Becker, David and others},
year = {2000},
publisher = {Technical Report CS-200006, Duke University},
}
@article{lindgren2003probabilistic,
title = {Probabilistic routing in intermittently connected networks},
author = {Lindgren, Anders and Doria, Avri and Schel{\'e}n, Olov},
journal = {ACM SIGMOBILE mobile computing and communications review},
volume = {7},
number = {3},
pages = {19--20},
year = {2003},
publisher = {ACM New York, NY, USA},
}
@inproceedings{burgess2006maxprop,
title = {MaxProp: Routing for Vehicle-Based Disruption-Tolerant Networks.},
author = {Burgess, John and Gallagher, Brian and Jensen, David D and Levine, Brian Neil and others},
booktitle = {Infocom},
volume = {6},
year = {2006},
organization = {Barcelona, Spain},
}
@inproceedings{balasubramanian2007dtn,
title = {DTN routing as a resource allocation problem},
author = {Balasubramanian, Aruna and Levine, Brian and Venkataramani, Arun},
booktitle = {Proceedings of the 2007 conference on Applications, technologies, architectures, and protocols for computer communications},
pages = {373--384},
year = {2007},
}
@techreport{RFC9171,
author = {S. Burleigh and K. Fall and E. Birrane and III},
title = {Bundle Protocol Version 7},
howpublished = {Internet Requests for Comments},
type = {RFC},
number = {9171},
year = {2022},
month = {January},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
}
@techreport{RFC5050,
author = {K. Scott and S. Burleigh},
title = {Bundle Protocol Specification},
howpublished = {Internet Requests for Comments},
type = {RFC},
number = {5050},
year = {2007},
month = {November},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
url = {http://www.rfc-editor.org/rfc/rfc5050.txt},
}
@techreport{RFC5326,
author = {M. Ramadas and S. Burleigh and S. Farrell},
title = {Licklider Transmission Protocol - Specification},
howpublished = {Internet Requests for Comments},
type = {RFC},
number = {5326},
year = {2008},
month = {September},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
}
@techreport{RFC0793,
author = {Jon Postel},
title = {Transmission Control Protocol},
howpublished = {Internet Requests for Comments},
type = {STD},
number = {7},
year = {1981},
month = {September},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
url = {http://www.rfc-editor.org/rfc/rfc793.txt},
}
@techreport{RFC7323,
author = {D. Borman and B. Braden and V. Jacobson and R. Scheffenegger},
title = {TCP Extensions for High Performance},
howpublished = {Internet Requests for Comments},
type = {RFC},
number = {7323},
year = {2014},
month = {September},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
}
@techreport{RFC0768,
author = {J. Postel},
title = {User Datagram Protocol},
howpublished = {Internet Requests for Comments},
type = {STD},
number = {6},
year = {1980},
month = {August},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
url = {http://www.rfc-editor.org/rfc/rfc768.txt},
}
@misc{sim-theone,
author = {Ari Keränen},
title = {The One},
year = {2015},
url = {https://github.com/akeranen/the-one},
urldate = {2022-07-22},
}
@misc{sim-ops,
author = {Communication Networks Bremen},
title = {OPS},
year = {2022},
url = {https://github.com/ComNets-Bremen/OPS},
urldate = {2022-07-22},
}
@misc{sim-ns3,
author = {bit-oh-my-god},
title = {ns3-dtn-bit},
year = {2018},
url = {https://github.com/bit-oh-my-god/ns3-dtn-bit},
urldate = {2022-07-22},
}
@misc{sim-dtnsim,
author = {Hiroyuki Ohsaki},
title = {dtnsim},
year = {2021},
url = {https://github.com/h-ohsaki/dtnsim},
urldate = {2022-07-22},
}
@misc{sim-dtn,
author = {Szymon Wieloch},
title = {DTN},
year = {2017},
url = {https://github.com/szymonwieloch/DTN},
urldate = {2022-07-22},
}
@techreport{burleigh2011contact,
title = {Contact graph routing},
author = {Burleigh, Scott C},
year = {2011},
}
@article{7433915,
author = {},
journal = {IEEE Std 802.1AB-2016 (Revision of IEEE Std 802.1AB-2009)},
title = {IEEE Standard for Local and metropolitan area networks - Station and Media Access Control Connectivity Discovery},
year = {2016},
volume = {},
number = {},
pages = {1-146},
doi = {10.1109/IEEESTD.2016.7433915},
}
@online{nasa-scottburleigh-2021-1,
author = {Scott Burleigh},
title = {Introduction to Delay/Disruption Tolerant Networking 1.1},
year = {2021},
url = {https://www.youtube.com/watch?v=2RHzIxbBJgo},
urldate = {2022-10-07},
}
@online{nasa-scottburleigh-2021-2,
author = {Scott Burleigh},
title = {Introduction to Delay/Disruption Tolerant Networking 1.2},
year = {2021},
url = {https://www.youtube.com/watch?v=CXqqIauO1qQ},
urldate = {2022-10-07},
}
@online{azure-sidecar,
author = {Microsoft},
title = {Sidecar pattern},
year = {2021},
url = {https://learn.microsoft.com/en-us/azure/architecture/patterns/sidecar},
urldate = {2022-12-16},
}
@techreport{RFC1034,
author = {P. Mockapetris},
title = {Domain names - concepts and facilities},
howpublished = {Internet Requests for Comments},
type = {STD},
number = {13},
year = {1987},
month = {November},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
url = {http://www.rfc-editor.org/rfc/rfc1034.txt},
note = {\url{http://www.rfc-editor.org/rfc/rfc1034.txt}},
}
@techreport{RFC1035,
author = {P. Mockapetris},
title = {Domain names - implementation and specification},
howpublished = {Internet Requests for Comments},
type = {STD},
number = {13},
year = {1987},
month = {November},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
url = {http://www.rfc-editor.org/rfc/rfc1035.txt},
note = {\url{http://www.rfc-editor.org/rfc/rfc1035.txt}},
}
@techreport{RFC0791,
author = {Jon Postel},
title = {Internet Protocol},
howpublished = {Internet Requests for Comments},
type = {STD},
number = {5},
year = {1981},
month = {September},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
url = {http://www.rfc-editor.org/rfc/rfc791.txt},
note = {\url{http://www.rfc-editor.org/rfc/rfc791.txt}},
}
@techreport{RFC1123,
author = {R. Braden},
title = {Requirements for Internet Hosts - Application and Support},
howpublished = {Internet Requests for Comments},
type = {STD},
number = {3},
year = {1989},
month = {October},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
}
@techreport{ietf-dtn-mtcpcl-01,
number = {draft-ietf-dtn-mtcpcl-01},
type = {Internet-Draft},
institution = {Internet Engineering Task Force},
publisher = {Internet Engineering Task Force},
note = {Work in Progress},
url = {https://datatracker.ietf.org/doc/draft-ietf-dtn-mtcpcl/01/},
author = {Scott C. Burleigh},
title = {{Minimal TCP Convergence-Layer Protocol}},
pagetotal = 8,
year = 2019,
month = apr,
day = 23,
abstract = {This document describes a Minimal TCP (MTCP) "convergence-layer" protocol for the Delay-Tolerant Networking (DTN) Bundle Protocol (BP). MTCP uses Transmission Control Protocol (TCP) to transmit BP "bundles" from one BP node to another node to which it is topologically adjacent in the BP network. The services provided by the MTCP convergence-layer protocol adapter utilize a standard TCP connection for the purposes of bundle transmission.},
}
@techreport{RFC1918,
author = {Yakov Rekhter and Robert G. Moskowitz and Daniel Karrenberg and Geert Jan de Groot and Eliot Lear},
title = {Address Allocation for Private Internets},
howpublished = {Internet Requests for Comments},
type = {BCP},
number = {5},
year = {1996},
month = {February},
issn = {2070-1721},
publisher = {RFC Editor},
institution = {RFC Editor},
url = {http://www.rfc-editor.org/rfc/rfc1918.txt},
note = {\url{http://www.rfc-editor.org/rfc/rfc1918.txt}},
}

View File

@ -0,0 +1,70 @@
\usepackage{acro}
\acsetup{
make-links
}
\DeclareAcronym{dtn}{
short = DTN,
long = Delay/Disruption-Tolerant Networking \cite{RFC5050,RFC9171} ,
}
\DeclareAcronym{tcp}{
short = TCP ,
long = Transmission Control Protocol \cite{RFC0793,RFC7323} ,
}
\DeclareAcronym{udp}{
short = UDP ,
long = User Datagram Protocol \cite{RFC0768},
}
\DeclareAcronym{ltp}{
short = LTP ,
long = Licklider Transmission Protocol \cite{RFC5326} ,
}
\DeclareAcronym{lldp}{
short = LLDP ,
long = Link Layer Discovery Protocol \cite{7433915} ,
}
\DeclareAcronym{ns3}{
short = ns-3 ,
long = ns-3 Network Simulator \footnote{\url{https://www.nsnam.org/}} ,
}
\DeclareAcronym{omnet}{
short = OMNeT++ ,
long = Open Modeling Network Simulator \footnote{\url{http://omnetpp.org/}} ,
}
\DeclareAcronym{inet}{
short = INET ,
long = INET \footnote{\url{https://inet.omnetpp.org/}} ,
}
\DeclareAcronym{bp}{
short = BP ,
long = Bundle Protocol \cite{RFC5050,RFC9171} ,
}
\DeclareAcronym{ops}{
short = OPS ,
long = Opportunistic Protocol Simulator ,
}
\DeclareAcronym{cgr}{
short = CGR ,
long = Contact Graph Routing \cite{burleigh2011contact} ,
}
\DeclareAcronym{dns}{
short = DNS,
long = Domain Name System \cite{RFC1034,RFC1035} ,
}
\DeclareAcronym{ip}{
short = IP,
long = Internet Protocol \cite{RFC0791} ,
}
\DeclareAcronym{cni}{
short = CNI,
long = Container Network Interface \footnote{\url{https://www.cni.dev/}} ,
}
\DeclareAcronym{oci}{
short = OCI,
long = Open Container Initiative \footnote{\url{https://opencontainers.org/}} ,
}

View File

@ -0,0 +1,22 @@
\faculty{Faculty of Computer Science}
\institute{Institute of Systems Architecture}
\chair{Chair of Computer Networks}
\date{\today}
\title{A Simulator for Delay- and Disruption-tolerant Networking based on Kubernetes}
\thesis{master}
\graduation[M.Sc.]{Master of Science}
\author{
Niccolo Borgioli
\matriculationnumber{XXX}%
\matriculationyear{XXX}
\dateofbirth{XXX}%
\placeofbirth{XXX}%
}
\supervisor{Dr. -Ing. Marius Feldmann}
\professor{Prof. Dr. rer. nat. habil. Dr. h. c. Alexander Schill}
\maketitle

View File

@ -0,0 +1,20 @@
% Copied from: https://ghost.tweakblogs.net/blog/720/requirement-list-in-latex.html
\newcommand{\reqitem}[1]{
$R_{#1}$
}
\newcommand{\reqinit}{
\newcounter{reqcountbackup}
\newcounter{reqcount}
\renewcommand{\thereqcount}{\reqitem{\arabic{reqcount}}}
}
\newcommand{\reqlist}[1]{
\begin{list}{\thereqcount}{\usecounter{reqcount}}
\setcounter{reqcount}{\value{reqcountbackup}}
#1
\setcounter{reqcountbackup}{\value{reqcount}}
\end{list}
}

78
thesis/src/main.tex Normal file
View File

@ -0,0 +1,78 @@
\documentclass{tudscrreprt}
% Basic
\usepackage{hyperref}
\usepackage{float}
% Code Snippets
\usepackage{minted}
% Acronyms
\usepackage{./lib/acronyms}
% SI Units
\usepackage[per-mode = symbol]{siunitx}
\iftutex
\usepackage{fontspec}
\else
\usepackage[T1]{fontenc}
\fi
% Images
\usepackage{graphicx}
\graphicspath{ {../images/} {../diagrams/} }
% Bib
\usepackage[backend=bibtex8, style=ieee]{biblatex}
\addbibresource{./db.bib}
% Dates
\usepackage{datetime}
% Symbol font
\usepackage{bbding}
% Requirements
\usepackage{./lib/requirements}
\reqinit
\begin{document}
\input{lib/metadata.sty}
\chapter*{Statement of Authorship}
I hereby declare that I have written this paper independently and have not used any sources or aids apart from those indicated.
I am submitting it for the first time as an examination performance.
Furthermore, I am aware that attempted cheating will be penalized with a grade of “insufficient” (5.0) and, if repeated, may lead to exclusion from further examinations.
\\\\
Dresden — \today
\clearpage
\setcounter{tocdepth}{1}
\tableofcontents
\input{chapters/introduction.tex}
\input{chapters/fundamentals.tex}
\input{chapters/requirements.tex}
\input{chapters/related-work.tex}
\input{chapters/concept.tex}
\input{chapters/implementation.tex}
\input{chapters/evaluation.tex}
\input{chapters/documentation.tex}
\input{chapters/summary.tex}
\printacronyms
\begin{appendix}
\listoffigures
\listoftables
\renewcommand\listoflistingscaption{List of source codes}
\listoflistings
\end{appendix}
\printbibliography
\end{document}