daily checkpoint

This commit is contained in:
Chris Kruining 2026-04-01 16:09:51 +02:00
parent cc86b0a815
commit 2ffece26f2
No known key found for this signature in database
GPG key ID: EB894A3560CCCAD2
15 changed files with 451 additions and 63 deletions

View file

@ -7,6 +7,39 @@
directory = ./.;
exportInterfaces = {
persistence = {lib, ...}: let
inherit (lib) mkOption types;
in {
options = {
main = mkOption {
type = types.str;
};
database = mkOption {
type = types.attrsOf types.anything;
};
};
};
servarr = {lib, ...}: let
inherit (lib) mkOption types;
in {
options = {
services = mkOption {
type = types.attrsOf (types.submodule {
options = {
port = mkOption {
type = types.port;
};
};
});
default = "awesome!";
};
};
};
};
inventory.machines = {
aule = {
name = "aule";
@ -82,10 +115,19 @@
};
};
persistence = {
module.name = "persistence";
module.input = "self";
# TODO :: Convert to use tags instead
roles.default.machines.ulmo.settings = {};
};
servarr = {
module.name = "servarr";
module.input = "self";
# TODO :: Convert to use tags instead
roles.default.machines.ulmo.settings = {};
roles.default.settings = {
enable = true;

View file

@ -6,7 +6,7 @@
Configuration of reverse proxy.
'';
categories = [ "Service", "Media" ];
readme = builtins.readFile ./README.md
readme = builtins.readFile ./README.md;
};
roles.default = {

View file

@ -1,24 +0,0 @@
{...}: {
_class = "clan.service";
manifest = {
name = "arda/persistance";
description = ''
Configuration of persistance resrouce(s)
(for now this means a database. and specifically it means postgres)
'';
categories = [ "Service", "Peristance" ];
readme = builtins.readFile ./README.md
};
roles.default = {
description = '''';
interface = {...}: {
options = {};
};
perInstance = {...}: {
nixosModule = {...}: {};
};
};
}

View file

@ -1,13 +0,0 @@
{...}: let
module = ./default.nix;
in {
clan.modules.peristance = module;
# perSystem = {...}: {
# clan.nixosTests.peristance = {
# imports = [];
# clan.modules."@arda/peristance" = module;
# };
# };
}

View file

@ -0,0 +1,35 @@
{...}: {
_class = "clan.service";
manifest = {
name = "arda/persistence";
description = ''
Configuration of persistence resrouce(s)
(for now this means a database. and specifically it means postgres)
'';
readme = builtins.readFile ./README.md;
exports.out = ["persistence"];
};
roles.default = {
description = '''';
interface = {...}: {
options = {};
};
perInstance = {mkExports, ...}: {
exports = mkExports {
persistence = {
main = "postgresql";
database.postgresql = {
host = "";
port = 5432;
};
};
};
nixosModule = {...}: {
};
};
};
}

View file

@ -0,0 +1,13 @@
{...}: let
module = ./default.nix;
in {
clan.modules.persistence = module;
# perSystem = {...}: {
# clan.nixosTests.persistence = {
# imports = [];
# clan.modules."@arda/persistence" = module;
# };
# };
}

View file

@ -1,15 +1,21 @@
{lib, ...}: {
{
exports,
clanLib,
lib,
...
}: {
_class = "clan.service";
manifest = {
name = "arda/servarr";
description = '''';
categories = ["Service" "Media"];
readme = builtins.readFile ./README.md;
# exports.out = [];
exports = {
inputs = ["persistence"];
out = ["servarr"];
};
};
# exports = {};
roles.default = {
description = '''';
@ -21,7 +27,7 @@
services = mkOption {
type = types.attrsOf (types.submodule ({name, ...}: {
options = {
enable = mkEnableOption "Enable ${name}";
enable = mkEnableOption "Enable ${name}" // {default = true;};
debug = mkEnableOption "Use tofu plan instead of tofu apply for ${name} ";
rootFolders = mkOption {
@ -43,8 +49,21 @@
settings,
machine,
roles,
mkExports,
...
}: {
exports = mkExports {
servarr.services =
settings.services
|> lib.attrNames
|> lib.concat ["sabnzbd" "qbittorrent" "flaresolverr"]
|> lib.imap1 (i: name: {
inherit name;
value = {port = 2000 + i;};
})
|> lib.listToAttrs;
};
nixosModule = args @ {
config,
lib,
@ -54,6 +73,8 @@
servarr = import ./lib.nix (args // {inherit settings;});
services = settings.services |> lib.attrNames;
service_count = services |> lib.length;
db = exports |> clanLib.getExport {serviceName = "persistence";};
in {
imports = [
(import ./sabnzbd.nix (args

View file

@ -10,7 +10,7 @@
createGenerator = {
service,
service_options,
options,
...
}: {
files = {
@ -39,7 +39,7 @@
createService = {
service,
service_options,
options,
...
}:
{
@ -55,7 +55,7 @@
server = {
bindaddress = "0.0.0.0";
port = service_options.port;
port = options.port;
};
postgres = {
@ -74,14 +74,14 @@
createSystemdService = {
service,
service_options,
options,
...
}: let
tofu = lib.getExe pkgs.opentofu;
terraformConfiguration = self.inputs.terranix.lib.terranixConfiguration {
system = pkgs.stdenv.hostPlatform.system;
modules = [
(createInfra {inherit service service_options;})
(createInfra {inherit service options;})
];
};
in {
@ -93,7 +93,7 @@
preStart = ''
install -d -m 0770 -o ${service} -g media /var/lib/${service}-apply-infra
${
service_options.rootFolders
options.rootFolders
|> lib.map (folder: "install -d -m 0770 -o media -g media ${folder}")
|> lib.join "\n"
}
@ -120,7 +120,7 @@
# Run the infrastructure code
${tofu} \
${
if service_options.debug
if options.debug
then "plan"
else "apply -auto-approve"
} \
@ -143,7 +143,7 @@
# Returns a module to be used in a modules list of terranix
createInfra = {
service,
service_options,
options,
...
}: terra: let
inherit (terra.lib) tfRef;
@ -181,14 +181,14 @@
};
provider.${service} = {
url = "http://[::1]:${toString service_options.port}";
url = "http://[::1]:${toString options.port}";
api_key = tfRef "var.${service}_api_key";
};
resource =
{
"${service}_root_folder" = mkIf (lib.elem service ["radarr" "sonarr" "whisparr" "readarr"]) (
service_options.rootFolders
options.rootFolders
|> lib.imap (i: f: lib.nameValuePair "local${toString i}" {path = f;})
|> lib.listToAttrs
);
@ -304,13 +304,17 @@ in {
config =
services
|> lib.attrsToList
|> lib.imap1 (i: service: o: let
service_options = o // {port = 2000 + i;};
|> lib.imap1 (i: {
name,
value,
}: let
service = name;
options = value // {port = 2000 + i;};
in {
clan.core.vars.generators.${service} = createGenerator {inherit service service_options;};
services.${service} = createService {inherit service service_options;};
clan.core.vars.generators.${service} = createGenerator {inherit service options;};
services.${service} = createService {inherit service options;};
systemd.services."${service}-apply-infra" = lib.mkIf settings.enable (createSystemdService {inherit service service_options;});
systemd.services."${service}-apply-infra" = lib.mkIf settings.enable (createSystemdService {inherit service options;});
})
|> lib.mkMerge;
};

View file

@ -0,0 +1,125 @@
# Mandos as a wake-on-demand build host
## Goal
Mandos is primarily an interactive living-room machine, but it is also a strong candidate for handling remote Nix builds when it is idle. The goal is to make that dual use practical without keeping the machine powered all the time.
## Current context
On `main`, Mandos is configured as an interactive gaming machine:
- `systems/x86_64-linux/mandos/default.nix`
- `sneeuwvlok.hardware.has.gpu.nvidia = true`
- `sneeuwvlok.hardware.has.audio = true`
- `sneeuwvlok.desktop.use = "gamescope"`
- `sneeuwvlok.application.steam.enable = true`
- `homes/x86_64-linux/chris@mandos/default.nix`
- user-facing application set for an interactive machine
This makes Mandos a poor fit for "always running random infrastructure", but a reasonable fit for "available for work when needed".
## Desired behavior
- Mandos remains an interactive machine first.
- Mandos can be used as a remote build worker when no one is actively using it.
- Mandos should not need to stay fully on all day just to be eligible for builds.
- Waking and idling down should be automatic enough that the machine can participate in builds without turning into a maintenance burden.
## Recommended model
### 1. Use wake-on-LAN as the activation mechanism
Mandos should support being awakened by another machine on the same LAN.
Requirements:
- BIOS or UEFI wake-on-LAN support enabled
- NixOS interface configuration enabling wake-on-LAN
- one low-power machine that is effectively always available to send wake requests
In this repo, `ulmo` is the obvious candidate to act as the coordinator, but the pattern should stay generic: one machine is always reachable, and one or more stronger machines can be woken on demand.
### 2. Prefer suspend-first over shutdown-first
There are two main power states worth considering:
- **Suspend on idle**
- faster resume
- generally better user experience
- often easier to make reliable for wake-on-LAN
- **Shutdown on idle**
- lowest power draw
- more fragile in practice because firmware support for wake from soft-off varies
- longer time to become available again
Recommended rollout order:
1. Prove the concept with suspend on idle.
2. Only consider full power-off later if the hardware and firmware behave reliably.
## 3. Add an explicit availability policy
The interesting lesson for tagging is not "Mandos should have a build tag". The interesting lesson is that some machines have a deliberate availability policy that affects how safely they can participate in automation.
A future host-level setting could encode this policy directly, for example:
- `always-on`
- `wake-on-demand`
- `manual`
That setting would be a better source for any computed operational tag than current workload or ad hoc tags.
## 4. Idle detection should be policy-driven
If Mandos becomes a build worker, idle shutdown or suspend should depend on signals such as:
- no local interactive session activity
- no active build job
- no long-running system task that should keep the machine awake
This should not be a blind timer that powers the machine down every X minutes regardless of context.
## 5. Build orchestration needs a coordinator
Wake-on-demand only works well if something else can wake the machine and wait for it to become reachable. In practice, this means:
- a coordinator sends the wake signal
- the build client retries until the machine is reachable
- the remote builder participates only after it is actually ready
The exact implementation can vary, but the architectural point is the same: a wakeable build worker is not self-sufficient.
## Risks and caveats
- Firmware wake support may be unreliable, especially from full shutdown.
- Build latency increases because wake and readiness checks take time.
- A machine that users expect to be immediately available should not surprise them with power-state transitions at awkward moments.
- Interactive workload detection matters; otherwise the machine will feel hostile as a living-room device.
## Recommendation
Treat the Mandos idea as a good pattern, but generalize it:
- some machines are **interactive**
- some machines are **wakeable on demand**
- some machines are suitable for **interruptible background work**
Those are more reusable concepts than "Mandos is the build server".
## Implications for the tag strategy
This investigation strengthens a small part of the `operational:*` space:
- `operational:availability:always-on`
- `operational:availability:wake-on-demand`
- `operational:workload:interruptible`
These should not be assigned by hand if they can instead be computed from explicit machine settings that describe availability policy.
## References
- Clan inventory tags and dynamic tags docs: `https://clan.lol/docs/25.11/reference/options/clan_inventory`
- NixOS Wake-on-LAN wiki: `https://wiki.nixos.org/wiki/Wake_on_LAN`
- Home-lab wake-on-demand discussion and patterns:
- `https://dgross.ca/blog/linux-home-server-auto-sleep`
- `https://danielpgross.github.io/friendly_neighbor/howto-sleep-wake-on-demand.html`

View file

@ -0,0 +1,185 @@
# Clan machine tagging strategy
## Goal
Replace machine-name targeting with stable tags that survive machine renames, hardware reshuffles, and service moves.
The strategy should fit how this repo is evolving:
- machine tags should describe the machine
- service roles should describe service topology
- computed tags should be derived from machine settings or other explicit metadata, not from other tags
## Source material
This plan is based on:
- current Clan inventory in `clan.nix`
- current machine configs under `machines/*/configuration.nix`
- workload and module usage on `main` under:
- `systems/x86_64-linux/*/default.nix`
- `homes/x86_64-linux/chris@*/default.nix`
- Clan inventory tag and dynamic-tag documentation
## Guiding principles
### 1. Prefer capabilities over roles
A machine rarely has one permanent role. In this repo especially, a machine may be interactive, portable, build-capable, and temporarily host some service at the same time.
Because of that, tags should describe durable traits and capabilities rather than trying to answer "what is this machine?"
### 2. Do not encode current workload as a machine tag
A machine currently running Grafana, Jellyfin, or PostgreSQL does not mean that those should become machine tags. Those are current placements, not stable identity.
If a service can move, its current presence is weak evidence for tagging.
### 3. Use service roles for topology
Some relationships belong in service definitions rather than host tags.
Examples:
- NFS producer and consumer
- persistence provider and client
- reverse proxy frontend and backend
These are not machine identity tags; they are service-topology relationships.
### 4. Derive tags from settings when possible
If a machine setting already captures a fact, derive the tag from that setting instead of duplicating it by hand.
Good examples in this repo:
- `desktop.use` can imply whether a machine is interactive
- `hardware.has.gpu.*` can imply GPU availability
- `hardware.has.audio` can imply audio capability
- `hardware.has.bluetooth` can imply Bluetooth capability
### 5. Avoid deriving tags from other tags
Clan supports dynamic tags, but tag-from-tag derivation can become fragile and can even recurse. If tags need computation, compute them from machine settings or an explicit metadata source instead.
## Proposed namespaces
Use full words:
- `capability:*`
- `operational:*`
The intention is:
- `capability:*` describes stable machine traits
- `operational:*` describes automation-relevant policy or availability behavior
## Capability tags
These are the strongest candidates for machine tags.
### Runtime
- `capability:runtime:interactive`
- `capability:runtime:headless`
These are directly useful for deciding where a service with a user-facing local experience does or does not belong.
### Hardware
- `capability:hardware:gpu`
- `capability:hardware:audio`
- `capability:hardware:bluetooth`
At the moment, the repo provides enough configuration structure to derive these from machine settings.
GPU vendor-specific tags are intentionally excluded for now. The current conclusion is that the presence of GPU hardware may matter, but the vendor usually does not unless there is a specific workload that depends on CUDA, ROCm, or a similar stack.
### Mobility
- `capability:mobility:portable`
- `capability:mobility:stationary`
These are useful concepts, but they are not currently obvious from one uniform machine setting in the repo. If they become desirable, they likely need either:
- an explicit machine setting, or
- a stronger convention around machine form factor
For now they are candidates, not automatic defaults.
## Operational tags
Operational tags are weaker than capability tags and should stay small in number.
They should only exist when they capture real automation constraints that are not already represented elsewhere.
### Availability
- `operational:availability:always-on`
- `operational:availability:wake-on-demand`
- `operational:availability:manual`
This dimension became clearer while thinking through the Mandos build-host idea. A machine may be technically capable of a workload, while its availability policy determines whether it is a sensible target.
These tags should not be guessed from existing workloads. They should come from an explicit machine setting that states the intended availability policy.
### Interruptibility
- `operational:workload:interruptible`
This is not about the machine by itself. It is a useful policy boundary for selecting machines that may host work that can be delayed, retried, paused, or moved.
If introduced, it should again come from explicit machine policy rather than being inferred from current services.
## What should not become machine tags
- current service assignments, such as Jellyfin, Grafana, Forgejo, or PostgreSQL
- service topology, such as NFS producer or consumer
- user application presence, such as Discord or TeamSpeak
- detailed desktop-environment choice, such as Plasma or Gamescope
- one-off descriptions like "living room" unless location becomes a deliberate scheduling dimension
## What is derivable today
The repo already contains enough structure to derive several useful capability tags.
Examples from the current configuration style:
- if a machine enables a desktop session, derive `capability:runtime:interactive`
- if a machine does not, derive `capability:runtime:headless`
- if a machine enables `hardware.has.audio`, derive `capability:hardware:audio`
- if a machine enables `hardware.has.bluetooth`, derive `capability:hardware:bluetooth`
- if a machine enables any `hardware.has.gpu.*`, derive `capability:hardware:gpu`
## What probably needs explicit policy
These should not be inferred from current services or tag combinations:
- `operational:availability:*`
- `operational:workload:interruptible`
- mobility-related tags if there is no explicit machine setting to derive them from
The clean way to support these is to introduce one or more explicit machine settings whose purpose is to describe machine policy rather than workload.
## Mandos update
The Mandos wake-on-demand build-host idea adds an important refinement:
- some machines should be eligible for background work only when they are available through a specific policy, such as wake-on-demand
This does **not** mean Mandos should get a hand-maintained "build server" tag.
It instead suggests a more generic pattern:
- a machine may be interactive
- a machine may be available on demand rather than always on
- that availability policy may influence whether certain classes of automation should target it
That strengthens the case for a very small `operational:*` namespace derived from explicit machine policy.
## Recommended next steps
1. Start with `capability:*` tags that are clearly derivable from machine settings.
2. Keep service topology in service roles instead of machine tags.
3. If availability policy becomes important, add an explicit machine setting for it and derive `operational:*` tags from that setting.
4. Avoid expanding the tag vocabulary until there is a clear service-selection use case for each added tag.

View file

@ -538,7 +538,7 @@ in
in
mkIf cfg.enable {
sneeuwvlok.services = {
persistance.postgresql.enable = true;
persistence.postgresql.enable = true;
networking.caddy = {
hosts = {

View file

@ -22,7 +22,7 @@ in {
config = mkIf cfg.enable {
sneeuwvlok.services = {
persistance.postgresql.enable = true;
persistence.postgresql.enable = true;
# virtualisation.podman.enable = true;
networking.caddy = {

View file

@ -25,7 +25,7 @@ in {
config = mkIf cfg.enable {
sneeuwvlok.services = {
persistance.postgresql.enable = true;
persistence.postgresql.enable = true;
virtualisation.podman.enable = true;
networking.caddy = {

View file

@ -6,9 +6,9 @@
}: let
inherit (lib) mkIf mkEnableOption;
cfg = config.sneeuwvlok.services.persistance.postgresql;
cfg = config.sneeuwvlok.services.persistence.postgresql;
in {
options.sneeuwvlok.services.persistance.postgresql = {
options.sneeuwvlok.services.persistence.postgresql = {
enable = mkEnableOption "Postgresql";
};