daily checkpoint

2026-04-01 16:09:51 +02:00 · 2026-04-01 16:09:51 +02:00 · 2ffece26f2
commit 2ffece26f2
parent cc86b0a815
15 changed files with 451 additions and 63 deletions
--- a/clan.nix
+++ b/clan.nix
@ -7,6 +7,39 @@

  directory = ./.;

+  exportInterfaces = {
+    persistence = {lib, ...}: let
+      inherit (lib) mkOption types;
+    in {
+      options = {
+        main = mkOption {
+          type = types.str;
+        };
+
+        database = mkOption {
+          type = types.attrsOf types.anything;
+        };
+      };
+    };
+
+    servarr = {lib, ...}: let
+      inherit (lib) mkOption types;
+    in {
+      options = {
+        services = mkOption {
+          type = types.attrsOf (types.submodule {
+            options = {
+              port = mkOption {
+                type = types.port;
+              };
+            };
+          });
+          default = "awesome!";
+        };
+      };
+    };
+  };
+
  inventory.machines = {
    aule = {
      name = "aule";
@ -82,10 +115,19 @@
      };
    };

+    persistence = {
+      module.name = "persistence";
+      module.input = "self";
+
+      # TODO :: Convert to use tags instead
+      roles.default.machines.ulmo.settings = {};
+    };
+
    servarr = {
      module.name = "servarr";
      module.input = "self";

+      # TODO :: Convert to use tags instead
      roles.default.machines.ulmo.settings = {};
      roles.default.settings = {
        enable = true;
--- a/clanServices/caddy/default.nix
+++ b/clanServices/caddy/default.nix
@ -6,7 +6,7 @@
      Configuration of reverse proxy.
    '';
    categories = [ "Service", "Media" ];
-    readme = builtins.readFile ./README.md
+    readme = builtins.readFile ./README.md;
  };

  roles.default = {
--- a/clanServices/peristance/default.nix
+++ b/clanServices/peristance/default.nix
@ -1,24 +0,0 @@
-{...}: {
-  _class = "clan.service";
-  manifest = {
-    name = "arda/persistance";
-    description = ''
-      Configuration of persistance resrouce(s)
-      (for now this means a database. and specifically it means postgres)
-    '';
-    categories = [ "Service", "Peristance" ];
-    readme = builtins.readFile ./README.md
-  };
-
-  roles.default = {
-    description = '''';
-
-    interface = {...}: {
-      options = {};
-    };
-
-    perInstance = {...}: {
-      nixosModule = {...}: {};
-    };
-  };
-}
--- a/clanServices/peristance/flake-module.nix
+++ b/clanServices/peristance/flake-module.nix
@ -1,13 +0,0 @@
-{...}: let
-  module = ./default.nix;
-in {
-  clan.modules.peristance = module;
-
-  # perSystem = {...}: {
-  #   clan.nixosTests.peristance = {
-  #     imports = [];
-
-  #     clan.modules."@arda/peristance" = module;
-  #   };
-  # };
-}
--- a/clanServices/peristence/README.md
+++ b/clanServices/peristence/README.md
--- a/clanServices/peristence/default.nix
+++ b/clanServices/peristence/default.nix
@ -0,0 +1,35 @@
+{...}: {
+  _class = "clan.service";
+  manifest = {
+    name = "arda/persistence";
+    description = ''
+      Configuration of persistence resrouce(s)
+      (for now this means a database. and specifically it means postgres)
+    '';
+    readme = builtins.readFile ./README.md;
+    exports.out = ["persistence"];
+  };
+
+  roles.default = {
+    description = '''';
+
+    interface = {...}: {
+      options = {};
+    };
+
+    perInstance = {mkExports, ...}: {
+      exports = mkExports {
+        persistence = {
+          main = "postgresql";
+          database.postgresql = {
+            host = "";
+            port = 5432;
+          };
+        };
+      };
+
+      nixosModule = {...}: {
+      };
+    };
+  };
+}
--- a/clanServices/peristence/flake-module.nix
+++ b/clanServices/peristence/flake-module.nix
@ -0,0 +1,13 @@
+{...}: let
+  module = ./default.nix;
+in {
+  clan.modules.persistence = module;
+
+  # perSystem = {...}: {
+  #   clan.nixosTests.persistence = {
+  #     imports = [];
+
+  #     clan.modules."@arda/persistence" = module;
+  #   };
+  # };
+}
--- a/clanServices/servarr/default.nix
+++ b/clanServices/servarr/default.nix
@ -1,15 +1,21 @@
-{lib, ...}: {
+{
+  exports,
+  clanLib,
+  lib,
+  ...
+}: {
  _class = "clan.service";
  manifest = {
    name = "arda/servarr";
    description = '''';
    categories = ["Service" "Media"];
    readme = builtins.readFile ./README.md;
-    # exports.out = [];
+    exports = {
+      inputs = ["persistence"];
+      out = ["servarr"];
+    };
  };

-  # exports = {};
-
  roles.default = {
    description = '''';

@ -21,7 +27,7 @@
        services = mkOption {
          type = types.attrsOf (types.submodule ({name, ...}: {
            options = {
-              enable = mkEnableOption "Enable ${name}";
+              enable = mkEnableOption "Enable ${name}" // {default = true;};
              debug = mkEnableOption "Use tofu plan instead of tofu apply for ${name} ";

              rootFolders = mkOption {
@ -43,8 +49,21 @@
      settings,
      machine,
      roles,
+      mkExports,
      ...
    }: {
+      exports = mkExports {
+        servarr.services =
+          settings.services
+          |> lib.attrNames
+          |> lib.concat ["sabnzbd" "qbittorrent" "flaresolverr"]
+          |> lib.imap1 (i: name: {
+            inherit name;
+            value = {port = 2000 + i;};
+          })
+          |> lib.listToAttrs;
+      };
+
      nixosModule = args @ {
        config,
        lib,
@ -54,6 +73,8 @@
        servarr = import ./lib.nix (args // {inherit settings;});
        services = settings.services |> lib.attrNames;
        service_count = services |> lib.length;
+
+        db = exports |> clanLib.getExport {serviceName = "persistence";};
      in {
        imports = [
          (import ./sabnzbd.nix (args
--- a/clanServices/servarr/lib.nix
+++ b/clanServices/servarr/lib.nix
@ -10,7 +10,7 @@

  createGenerator = {
    service,
-    service_options,
+    options,
    ...
  }: {
    files = {
@ -39,7 +39,7 @@

  createService = {
    service,
-    service_options,
+    options,
    ...
  }:
    {
@ -55,7 +55,7 @@

        server = {
          bindaddress = "0.0.0.0";
-          port = service_options.port;
+          port = options.port;
        };

        postgres = {
@ -74,14 +74,14 @@

  createSystemdService = {
    service,
-    service_options,
+    options,
    ...
  }: let
    tofu = lib.getExe pkgs.opentofu;
    terraformConfiguration = self.inputs.terranix.lib.terranixConfiguration {
      system = pkgs.stdenv.hostPlatform.system;
      modules = [
-        (createInfra {inherit service service_options;})
+        (createInfra {inherit service options;})
      ];
    };
  in {
@ -93,7 +93,7 @@
    preStart = ''
      install -d -m 0770 -o ${service} -g media /var/lib/${service}-apply-infra
      ${
-        service_options.rootFolders
+        options.rootFolders
        |> lib.map (folder: "install -d -m 0770 -o media -g media ${folder}")
        |> lib.join "\n"
      }
@ -120,7 +120,7 @@
      # Run the infrastructure code
      ${tofu} \
      ${
-        if service_options.debug
+        if options.debug
        then "plan"
        else "apply -auto-approve"
      } \
@ -143,7 +143,7 @@
  # Returns a module to be used in a modules list of terranix
  createInfra = {
    service,
-    service_options,
+    options,
    ...
  }: terra: let
    inherit (terra.lib) tfRef;
@ -181,14 +181,14 @@
    };

    provider.${service} = {
-      url = "http://[::1]:${toString service_options.port}";
+      url = "http://[::1]:${toString options.port}";
      api_key = tfRef "var.${service}_api_key";
    };

    resource =
      {
        "${service}_root_folder" = mkIf (lib.elem service ["radarr" "sonarr" "whisparr" "readarr"]) (
-          service_options.rootFolders
+          options.rootFolders
          |> lib.imap (i: f: lib.nameValuePair "local${toString i}" {path = f;})
          |> lib.listToAttrs
        );
@ -304,13 +304,17 @@ in {
    config =
      services
      |> lib.attrsToList
-      |> lib.imap1 (i: service: o: let
-        service_options = o // {port = 2000 + i;};
+      |> lib.imap1 (i: {
+        name,
+        value,
+      }: let
+        service = name;
+        options = value // {port = 2000 + i;};
      in {
-        clan.core.vars.generators.${service} = createGenerator {inherit service service_options;};
-        services.${service} = createService {inherit service service_options;};
+        clan.core.vars.generators.${service} = createGenerator {inherit service options;};
+        services.${service} = createService {inherit service options;};

-        systemd.services."${service}-apply-infra" = lib.mkIf settings.enable (createSystemdService {inherit service service_options;});
+        systemd.services."${service}-apply-infra" = lib.mkIf settings.enable (createSystemdService {inherit service options;});
      })
      |> lib.mkMerge;
  };
--- a/docs/plans/mandos-wake-on-demand-build-host.md
+++ b/docs/plans/mandos-wake-on-demand-build-host.md
@ -0,0 +1,125 @@
+# Mandos as a wake-on-demand build host
+
+## Goal
+
+Mandos is primarily an interactive living-room machine, but it is also a strong candidate for handling remote Nix builds when it is idle. The goal is to make that dual use practical without keeping the machine powered all the time.
+
+## Current context
+
+On `main`, Mandos is configured as an interactive gaming machine:
+
+- `systems/x86_64-linux/mandos/default.nix`
+  - `sneeuwvlok.hardware.has.gpu.nvidia = true`
+  - `sneeuwvlok.hardware.has.audio = true`
+  - `sneeuwvlok.desktop.use = "gamescope"`
+  - `sneeuwvlok.application.steam.enable = true`
+- `homes/x86_64-linux/chris@mandos/default.nix`
+  - user-facing application set for an interactive machine
+
+This makes Mandos a poor fit for "always running random infrastructure", but a reasonable fit for "available for work when needed".
+
+## Desired behavior
+
+- Mandos remains an interactive machine first.
+- Mandos can be used as a remote build worker when no one is actively using it.
+- Mandos should not need to stay fully on all day just to be eligible for builds.
+- Waking and idling down should be automatic enough that the machine can participate in builds without turning into a maintenance burden.
+
+## Recommended model
+
+### 1. Use wake-on-LAN as the activation mechanism
+
+Mandos should support being awakened by another machine on the same LAN.
+
+Requirements:
+
+- BIOS or UEFI wake-on-LAN support enabled
+- NixOS interface configuration enabling wake-on-LAN
+- one low-power machine that is effectively always available to send wake requests
+
+In this repo, `ulmo` is the obvious candidate to act as the coordinator, but the pattern should stay generic: one machine is always reachable, and one or more stronger machines can be woken on demand.
+
+### 2. Prefer suspend-first over shutdown-first
+
+There are two main power states worth considering:
+
+- **Suspend on idle**
+  - faster resume
+  - generally better user experience
+  - often easier to make reliable for wake-on-LAN
+- **Shutdown on idle**
+  - lowest power draw
+  - more fragile in practice because firmware support for wake from soft-off varies
+  - longer time to become available again
+
+Recommended rollout order:
+
+1. Prove the concept with suspend on idle.
+2. Only consider full power-off later if the hardware and firmware behave reliably.
+
+## 3. Add an explicit availability policy
+
+The interesting lesson for tagging is not "Mandos should have a build tag". The interesting lesson is that some machines have a deliberate availability policy that affects how safely they can participate in automation.
+
+A future host-level setting could encode this policy directly, for example:
+
+- `always-on`
+- `wake-on-demand`
+- `manual`
+
+That setting would be a better source for any computed operational tag than current workload or ad hoc tags.
+
+## 4. Idle detection should be policy-driven
+
+If Mandos becomes a build worker, idle shutdown or suspend should depend on signals such as:
+
+- no local interactive session activity
+- no active build job
+- no long-running system task that should keep the machine awake
+
+This should not be a blind timer that powers the machine down every X minutes regardless of context.
+
+## 5. Build orchestration needs a coordinator
+
+Wake-on-demand only works well if something else can wake the machine and wait for it to become reachable. In practice, this means:
+
+- a coordinator sends the wake signal
+- the build client retries until the machine is reachable
+- the remote builder participates only after it is actually ready
+
+The exact implementation can vary, but the architectural point is the same: a wakeable build worker is not self-sufficient.
+
+## Risks and caveats
+
+- Firmware wake support may be unreliable, especially from full shutdown.
+- Build latency increases because wake and readiness checks take time.
+- A machine that users expect to be immediately available should not surprise them with power-state transitions at awkward moments.
+- Interactive workload detection matters; otherwise the machine will feel hostile as a living-room device.
+
+## Recommendation
+
+Treat the Mandos idea as a good pattern, but generalize it:
+
+- some machines are **interactive**
+- some machines are **wakeable on demand**
+- some machines are suitable for **interruptible background work**
+
+Those are more reusable concepts than "Mandos is the build server".
+
+## Implications for the tag strategy
+
+This investigation strengthens a small part of the `operational:*` space:
+
+- `operational:availability:always-on`
+- `operational:availability:wake-on-demand`
+- `operational:workload:interruptible`
+
+These should not be assigned by hand if they can instead be computed from explicit machine settings that describe availability policy.
+
+## References
+
+- Clan inventory tags and dynamic tags docs: `https://clan.lol/docs/25.11/reference/options/clan_inventory`
+- NixOS Wake-on-LAN wiki: `https://wiki.nixos.org/wiki/Wake_on_LAN`
+- Home-lab wake-on-demand discussion and patterns:
+  - `https://dgross.ca/blog/linux-home-server-auto-sleep`
+  - `https://danielpgross.github.io/friendly_neighbor/howto-sleep-wake-on-demand.html`
--- a/docs/plans/tagging-strategy.md
+++ b/docs/plans/tagging-strategy.md
@ -0,0 +1,185 @@
+# Clan machine tagging strategy
+
+## Goal
+
+Replace machine-name targeting with stable tags that survive machine renames, hardware reshuffles, and service moves.
+
+The strategy should fit how this repo is evolving:
+
+- machine tags should describe the machine
+- service roles should describe service topology
+- computed tags should be derived from machine settings or other explicit metadata, not from other tags
+
+## Source material
+
+This plan is based on:
+
+- current Clan inventory in `clan.nix`
+- current machine configs under `machines/*/configuration.nix`
+- workload and module usage on `main` under:
+  - `systems/x86_64-linux/*/default.nix`
+  - `homes/x86_64-linux/chris@*/default.nix`
+- Clan inventory tag and dynamic-tag documentation
+
+## Guiding principles
+
+### 1. Prefer capabilities over roles
+
+A machine rarely has one permanent role. In this repo especially, a machine may be interactive, portable, build-capable, and temporarily host some service at the same time.
+
+Because of that, tags should describe durable traits and capabilities rather than trying to answer "what is this machine?"
+
+### 2. Do not encode current workload as a machine tag
+
+A machine currently running Grafana, Jellyfin, or PostgreSQL does not mean that those should become machine tags. Those are current placements, not stable identity.
+
+If a service can move, its current presence is weak evidence for tagging.
+
+### 3. Use service roles for topology
+
+Some relationships belong in service definitions rather than host tags.
+
+Examples:
+
+- NFS producer and consumer
+- persistence provider and client
+- reverse proxy frontend and backend
+
+These are not machine identity tags; they are service-topology relationships.
+
+### 4. Derive tags from settings when possible
+
+If a machine setting already captures a fact, derive the tag from that setting instead of duplicating it by hand.
+
+Good examples in this repo:
+
+- `desktop.use` can imply whether a machine is interactive
+- `hardware.has.gpu.*` can imply GPU availability
+- `hardware.has.audio` can imply audio capability
+- `hardware.has.bluetooth` can imply Bluetooth capability
+
+### 5. Avoid deriving tags from other tags
+
+Clan supports dynamic tags, but tag-from-tag derivation can become fragile and can even recurse. If tags need computation, compute them from machine settings or an explicit metadata source instead.
+
+## Proposed namespaces
+
+Use full words:
+
+- `capability:*`
+- `operational:*`
+
+The intention is:
+
+- `capability:*` describes stable machine traits
+- `operational:*` describes automation-relevant policy or availability behavior
+
+## Capability tags
+
+These are the strongest candidates for machine tags.
+
+### Runtime
+
+- `capability:runtime:interactive`
+- `capability:runtime:headless`
+
+These are directly useful for deciding where a service with a user-facing local experience does or does not belong.
+
+### Hardware
+
+- `capability:hardware:gpu`
+- `capability:hardware:audio`
+- `capability:hardware:bluetooth`
+
+At the moment, the repo provides enough configuration structure to derive these from machine settings.
+
+GPU vendor-specific tags are intentionally excluded for now. The current conclusion is that the presence of GPU hardware may matter, but the vendor usually does not unless there is a specific workload that depends on CUDA, ROCm, or a similar stack.
+
+### Mobility
+
+- `capability:mobility:portable`
+- `capability:mobility:stationary`
+
+These are useful concepts, but they are not currently obvious from one uniform machine setting in the repo. If they become desirable, they likely need either:
+
+- an explicit machine setting, or
+- a stronger convention around machine form factor
+
+For now they are candidates, not automatic defaults.
+
+## Operational tags
+
+Operational tags are weaker than capability tags and should stay small in number.
+
+They should only exist when they capture real automation constraints that are not already represented elsewhere.
+
+### Availability
+
+- `operational:availability:always-on`
+- `operational:availability:wake-on-demand`
+- `operational:availability:manual`
+
+This dimension became clearer while thinking through the Mandos build-host idea. A machine may be technically capable of a workload, while its availability policy determines whether it is a sensible target.
+
+These tags should not be guessed from existing workloads. They should come from an explicit machine setting that states the intended availability policy.
+
+### Interruptibility
+
+- `operational:workload:interruptible`
+
+This is not about the machine by itself. It is a useful policy boundary for selecting machines that may host work that can be delayed, retried, paused, or moved.
+
+If introduced, it should again come from explicit machine policy rather than being inferred from current services.
+
+## What should not become machine tags
+
+- current service assignments, such as Jellyfin, Grafana, Forgejo, or PostgreSQL
+- service topology, such as NFS producer or consumer
+- user application presence, such as Discord or TeamSpeak
+- detailed desktop-environment choice, such as Plasma or Gamescope
+- one-off descriptions like "living room" unless location becomes a deliberate scheduling dimension
+
+## What is derivable today
+
+The repo already contains enough structure to derive several useful capability tags.
+
+Examples from the current configuration style:
+
+- if a machine enables a desktop session, derive `capability:runtime:interactive`
+- if a machine does not, derive `capability:runtime:headless`
+- if a machine enables `hardware.has.audio`, derive `capability:hardware:audio`
+- if a machine enables `hardware.has.bluetooth`, derive `capability:hardware:bluetooth`
+- if a machine enables any `hardware.has.gpu.*`, derive `capability:hardware:gpu`
+
+## What probably needs explicit policy
+
+These should not be inferred from current services or tag combinations:
+
+- `operational:availability:*`
+- `operational:workload:interruptible`
+- mobility-related tags if there is no explicit machine setting to derive them from
+
+The clean way to support these is to introduce one or more explicit machine settings whose purpose is to describe machine policy rather than workload.
+
+## Mandos update
+
+The Mandos wake-on-demand build-host idea adds an important refinement:
+
+- some machines should be eligible for background work only when they are available through a specific policy, such as wake-on-demand
+
+This does **not** mean Mandos should get a hand-maintained "build server" tag.
+
+It instead suggests a more generic pattern:
+
+- a machine may be interactive
+- a machine may be available on demand rather than always on
+- that availability policy may influence whether certain classes of automation should target it
+
+That strengthens the case for a very small `operational:*` namespace derived from explicit machine policy.
+
+## Recommended next steps
+
+1. Start with `capability:*` tags that are clearly derivable from machine settings.
+2. Keep service topology in service roles instead of machine tags.
+3. If availability policy becomes important, add an explicit machine setting for it and derive `operational:*` tags from that setting.
+4. Avoid expanding the tag vocabulary until there is a clear service-selection use case for each added tag.
--- a/modules/nixos/services/authentication/zitadel.nix
+++ b/modules/nixos/services/authentication/zitadel.nix
@ -538,7 +538,7 @@ in
  in
  mkIf cfg.enable {
    sneeuwvlok.services = {
-      persistance.postgresql.enable = true;
+      persistence.postgresql.enable = true;

      networking.caddy = {
        hosts = {
--- a/modules/nixos/services/communication/matrix.nix
+++ b/modules/nixos/services/communication/matrix.nix
@ -22,7 +22,7 @@ in {

  config = mkIf cfg.enable {
    sneeuwvlok.services = {
-      persistance.postgresql.enable = true;
+      persistence.postgresql.enable = true;
      # virtualisation.podman.enable = true;

      networking.caddy = {
--- a/modules/nixos/services/development/forgejo.nix
+++ b/modules/nixos/services/development/forgejo.nix
@ -25,7 +25,7 @@ in {

  config = mkIf cfg.enable {
    sneeuwvlok.services = {
-      persistance.postgresql.enable = true;
+      persistence.postgresql.enable = true;
      virtualisation.podman.enable = true;

      networking.caddy = {
--- a/modules/nixos/services/persistance/postgresql.nix
+++ b/modules/nixos/services/persistance/postgresql.nix
@ -6,9 +6,9 @@
 }: let
  inherit (lib) mkIf mkEnableOption;

-  cfg = config.sneeuwvlok.services.persistance.postgresql;
+  cfg = config.sneeuwvlok.services.persistence.postgresql;
 in {
-  options.sneeuwvlok.services.persistance.postgresql = {
+  options.sneeuwvlok.services.persistence.postgresql = {
    enable = mkEnableOption "Postgresql";
  };