diff --git a/clan.nix b/clan.nix index 83a59e7..ae3cb5e 100644 --- a/clan.nix +++ b/clan.nix @@ -7,6 +7,39 @@ directory = ./.; + exportInterfaces = { + persistence = {lib, ...}: let + inherit (lib) mkOption types; + in { + options = { + main = mkOption { + type = types.str; + }; + + database = mkOption { + type = types.attrsOf types.anything; + }; + }; + }; + + servarr = {lib, ...}: let + inherit (lib) mkOption types; + in { + options = { + services = mkOption { + type = types.attrsOf (types.submodule { + options = { + port = mkOption { + type = types.port; + }; + }; + }); + default = "awesome!"; + }; + }; + }; + }; + inventory.machines = { aule = { name = "aule"; @@ -82,10 +115,19 @@ }; }; + persistence = { + module.name = "persistence"; + module.input = "self"; + + # TODO :: Convert to use tags instead + roles.default.machines.ulmo.settings = {}; + }; + servarr = { module.name = "servarr"; module.input = "self"; + # TODO :: Convert to use tags instead roles.default.machines.ulmo.settings = {}; roles.default.settings = { enable = true; diff --git a/clanServices/caddy/default.nix b/clanServices/caddy/default.nix index 4350c62..fc3ae7a 100644 --- a/clanServices/caddy/default.nix +++ b/clanServices/caddy/default.nix @@ -6,7 +6,7 @@ Configuration of reverse proxy. ''; categories = [ "Service", "Media" ]; - readme = builtins.readFile ./README.md + readme = builtins.readFile ./README.md; }; roles.default = { diff --git a/clanServices/peristance/default.nix b/clanServices/peristance/default.nix deleted file mode 100644 index 25503ff..0000000 --- a/clanServices/peristance/default.nix +++ /dev/null @@ -1,24 +0,0 @@ -{...}: { - _class = "clan.service"; - manifest = { - name = "arda/persistance"; - description = '' - Configuration of persistance resrouce(s) - (for now this means a database. and specifically it means postgres) - ''; - categories = [ "Service", "Peristance" ]; - readme = builtins.readFile ./README.md - }; - - roles.default = { - description = ''''; - - interface = {...}: { - options = {}; - }; - - perInstance = {...}: { - nixosModule = {...}: {}; - }; - }; -} diff --git a/clanServices/peristance/flake-module.nix b/clanServices/peristance/flake-module.nix deleted file mode 100644 index 4fc4110..0000000 --- a/clanServices/peristance/flake-module.nix +++ /dev/null @@ -1,13 +0,0 @@ -{...}: let - module = ./default.nix; -in { - clan.modules.peristance = module; - - # perSystem = {...}: { - # clan.nixosTests.peristance = { - # imports = []; - - # clan.modules."@arda/peristance" = module; - # }; - # }; -} diff --git a/clanServices/peristance/README.md b/clanServices/peristence/README.md similarity index 100% rename from clanServices/peristance/README.md rename to clanServices/peristence/README.md diff --git a/clanServices/peristence/default.nix b/clanServices/peristence/default.nix new file mode 100644 index 0000000..d0cbc19 --- /dev/null +++ b/clanServices/peristence/default.nix @@ -0,0 +1,35 @@ +{...}: { + _class = "clan.service"; + manifest = { + name = "arda/persistence"; + description = '' + Configuration of persistence resrouce(s) + (for now this means a database. and specifically it means postgres) + ''; + readme = builtins.readFile ./README.md; + exports.out = ["persistence"]; + }; + + roles.default = { + description = ''''; + + interface = {...}: { + options = {}; + }; + + perInstance = {mkExports, ...}: { + exports = mkExports { + persistence = { + main = "postgresql"; + database.postgresql = { + host = ""; + port = 5432; + }; + }; + }; + + nixosModule = {...}: { + }; + }; + }; +} diff --git a/clanServices/peristence/flake-module.nix b/clanServices/peristence/flake-module.nix new file mode 100644 index 0000000..56801d9 --- /dev/null +++ b/clanServices/peristence/flake-module.nix @@ -0,0 +1,13 @@ +{...}: let + module = ./default.nix; +in { + clan.modules.persistence = module; + + # perSystem = {...}: { + # clan.nixosTests.persistence = { + # imports = []; + + # clan.modules."@arda/persistence" = module; + # }; + # }; +} diff --git a/clanServices/servarr/default.nix b/clanServices/servarr/default.nix index 2394460..1b36eeb 100644 --- a/clanServices/servarr/default.nix +++ b/clanServices/servarr/default.nix @@ -1,15 +1,21 @@ -{lib, ...}: { +{ + exports, + clanLib, + lib, + ... +}: { _class = "clan.service"; manifest = { name = "arda/servarr"; description = ''''; categories = ["Service" "Media"]; readme = builtins.readFile ./README.md; - # exports.out = []; + exports = { + inputs = ["persistence"]; + out = ["servarr"]; + }; }; - # exports = {}; - roles.default = { description = ''''; @@ -21,7 +27,7 @@ services = mkOption { type = types.attrsOf (types.submodule ({name, ...}: { options = { - enable = mkEnableOption "Enable ${name}"; + enable = mkEnableOption "Enable ${name}" // {default = true;}; debug = mkEnableOption "Use tofu plan instead of tofu apply for ${name} "; rootFolders = mkOption { @@ -43,8 +49,21 @@ settings, machine, roles, + mkExports, ... }: { + exports = mkExports { + servarr.services = + settings.services + |> lib.attrNames + |> lib.concat ["sabnzbd" "qbittorrent" "flaresolverr"] + |> lib.imap1 (i: name: { + inherit name; + value = {port = 2000 + i;}; + }) + |> lib.listToAttrs; + }; + nixosModule = args @ { config, lib, @@ -54,6 +73,8 @@ servarr = import ./lib.nix (args // {inherit settings;}); services = settings.services |> lib.attrNames; service_count = services |> lib.length; + + db = exports |> clanLib.getExport {serviceName = "persistence";}; in { imports = [ (import ./sabnzbd.nix (args diff --git a/clanServices/servarr/lib.nix b/clanServices/servarr/lib.nix index 45b2831..43fde4d 100644 --- a/clanServices/servarr/lib.nix +++ b/clanServices/servarr/lib.nix @@ -10,7 +10,7 @@ createGenerator = { service, - service_options, + options, ... }: { files = { @@ -39,7 +39,7 @@ createService = { service, - service_options, + options, ... }: { @@ -55,7 +55,7 @@ server = { bindaddress = "0.0.0.0"; - port = service_options.port; + port = options.port; }; postgres = { @@ -74,14 +74,14 @@ createSystemdService = { service, - service_options, + options, ... }: let tofu = lib.getExe pkgs.opentofu; terraformConfiguration = self.inputs.terranix.lib.terranixConfiguration { system = pkgs.stdenv.hostPlatform.system; modules = [ - (createInfra {inherit service service_options;}) + (createInfra {inherit service options;}) ]; }; in { @@ -93,7 +93,7 @@ preStart = '' install -d -m 0770 -o ${service} -g media /var/lib/${service}-apply-infra ${ - service_options.rootFolders + options.rootFolders |> lib.map (folder: "install -d -m 0770 -o media -g media ${folder}") |> lib.join "\n" } @@ -120,7 +120,7 @@ # Run the infrastructure code ${tofu} \ ${ - if service_options.debug + if options.debug then "plan" else "apply -auto-approve" } \ @@ -143,7 +143,7 @@ # Returns a module to be used in a modules list of terranix createInfra = { service, - service_options, + options, ... }: terra: let inherit (terra.lib) tfRef; @@ -181,14 +181,14 @@ }; provider.${service} = { - url = "http://[::1]:${toString service_options.port}"; + url = "http://[::1]:${toString options.port}"; api_key = tfRef "var.${service}_api_key"; }; resource = { "${service}_root_folder" = mkIf (lib.elem service ["radarr" "sonarr" "whisparr" "readarr"]) ( - service_options.rootFolders + options.rootFolders |> lib.imap (i: f: lib.nameValuePair "local${toString i}" {path = f;}) |> lib.listToAttrs ); @@ -304,13 +304,17 @@ in { config = services |> lib.attrsToList - |> lib.imap1 (i: service: o: let - service_options = o // {port = 2000 + i;}; + |> lib.imap1 (i: { + name, + value, + }: let + service = name; + options = value // {port = 2000 + i;}; in { - clan.core.vars.generators.${service} = createGenerator {inherit service service_options;}; - services.${service} = createService {inherit service service_options;}; + clan.core.vars.generators.${service} = createGenerator {inherit service options;}; + services.${service} = createService {inherit service options;}; - systemd.services."${service}-apply-infra" = lib.mkIf settings.enable (createSystemdService {inherit service service_options;}); + systemd.services."${service}-apply-infra" = lib.mkIf settings.enable (createSystemdService {inherit service options;}); }) |> lib.mkMerge; }; diff --git a/docs/plans/mandos-wake-on-demand-build-host.md b/docs/plans/mandos-wake-on-demand-build-host.md new file mode 100644 index 0000000..0775828 --- /dev/null +++ b/docs/plans/mandos-wake-on-demand-build-host.md @@ -0,0 +1,125 @@ +# Mandos as a wake-on-demand build host + +## Goal + +Mandos is primarily an interactive living-room machine, but it is also a strong candidate for handling remote Nix builds when it is idle. The goal is to make that dual use practical without keeping the machine powered all the time. + +## Current context + +On `main`, Mandos is configured as an interactive gaming machine: + +- `systems/x86_64-linux/mandos/default.nix` + - `sneeuwvlok.hardware.has.gpu.nvidia = true` + - `sneeuwvlok.hardware.has.audio = true` + - `sneeuwvlok.desktop.use = "gamescope"` + - `sneeuwvlok.application.steam.enable = true` +- `homes/x86_64-linux/chris@mandos/default.nix` + - user-facing application set for an interactive machine + +This makes Mandos a poor fit for "always running random infrastructure", but a reasonable fit for "available for work when needed". + +## Desired behavior + +- Mandos remains an interactive machine first. +- Mandos can be used as a remote build worker when no one is actively using it. +- Mandos should not need to stay fully on all day just to be eligible for builds. +- Waking and idling down should be automatic enough that the machine can participate in builds without turning into a maintenance burden. + +## Recommended model + +### 1. Use wake-on-LAN as the activation mechanism + +Mandos should support being awakened by another machine on the same LAN. + +Requirements: + +- BIOS or UEFI wake-on-LAN support enabled +- NixOS interface configuration enabling wake-on-LAN +- one low-power machine that is effectively always available to send wake requests + +In this repo, `ulmo` is the obvious candidate to act as the coordinator, but the pattern should stay generic: one machine is always reachable, and one or more stronger machines can be woken on demand. + +### 2. Prefer suspend-first over shutdown-first + +There are two main power states worth considering: + +- **Suspend on idle** + - faster resume + - generally better user experience + - often easier to make reliable for wake-on-LAN +- **Shutdown on idle** + - lowest power draw + - more fragile in practice because firmware support for wake from soft-off varies + - longer time to become available again + +Recommended rollout order: + +1. Prove the concept with suspend on idle. +2. Only consider full power-off later if the hardware and firmware behave reliably. + +## 3. Add an explicit availability policy + +The interesting lesson for tagging is not "Mandos should have a build tag". The interesting lesson is that some machines have a deliberate availability policy that affects how safely they can participate in automation. + +A future host-level setting could encode this policy directly, for example: + +- `always-on` +- `wake-on-demand` +- `manual` + +That setting would be a better source for any computed operational tag than current workload or ad hoc tags. + +## 4. Idle detection should be policy-driven + +If Mandos becomes a build worker, idle shutdown or suspend should depend on signals such as: + +- no local interactive session activity +- no active build job +- no long-running system task that should keep the machine awake + +This should not be a blind timer that powers the machine down every X minutes regardless of context. + +## 5. Build orchestration needs a coordinator + +Wake-on-demand only works well if something else can wake the machine and wait for it to become reachable. In practice, this means: + +- a coordinator sends the wake signal +- the build client retries until the machine is reachable +- the remote builder participates only after it is actually ready + +The exact implementation can vary, but the architectural point is the same: a wakeable build worker is not self-sufficient. + +## Risks and caveats + +- Firmware wake support may be unreliable, especially from full shutdown. +- Build latency increases because wake and readiness checks take time. +- A machine that users expect to be immediately available should not surprise them with power-state transitions at awkward moments. +- Interactive workload detection matters; otherwise the machine will feel hostile as a living-room device. + +## Recommendation + +Treat the Mandos idea as a good pattern, but generalize it: + +- some machines are **interactive** +- some machines are **wakeable on demand** +- some machines are suitable for **interruptible background work** + +Those are more reusable concepts than "Mandos is the build server". + +## Implications for the tag strategy + +This investigation strengthens a small part of the `operational:*` space: + +- `operational:availability:always-on` +- `operational:availability:wake-on-demand` +- `operational:workload:interruptible` + +These should not be assigned by hand if they can instead be computed from explicit machine settings that describe availability policy. + +## References + +- Clan inventory tags and dynamic tags docs: `https://clan.lol/docs/25.11/reference/options/clan_inventory` +- NixOS Wake-on-LAN wiki: `https://wiki.nixos.org/wiki/Wake_on_LAN` +- Home-lab wake-on-demand discussion and patterns: + - `https://dgross.ca/blog/linux-home-server-auto-sleep` + - `https://danielpgross.github.io/friendly_neighbor/howto-sleep-wake-on-demand.html` diff --git a/docs/plans/tagging-strategy.md b/docs/plans/tagging-strategy.md new file mode 100644 index 0000000..eb77376 --- /dev/null +++ b/docs/plans/tagging-strategy.md @@ -0,0 +1,185 @@ +# Clan machine tagging strategy + +## Goal + +Replace machine-name targeting with stable tags that survive machine renames, hardware reshuffles, and service moves. + +The strategy should fit how this repo is evolving: + +- machine tags should describe the machine +- service roles should describe service topology +- computed tags should be derived from machine settings or other explicit metadata, not from other tags + +## Source material + +This plan is based on: + +- current Clan inventory in `clan.nix` +- current machine configs under `machines/*/configuration.nix` +- workload and module usage on `main` under: + - `systems/x86_64-linux/*/default.nix` + - `homes/x86_64-linux/chris@*/default.nix` +- Clan inventory tag and dynamic-tag documentation + +## Guiding principles + +### 1. Prefer capabilities over roles + +A machine rarely has one permanent role. In this repo especially, a machine may be interactive, portable, build-capable, and temporarily host some service at the same time. + +Because of that, tags should describe durable traits and capabilities rather than trying to answer "what is this machine?" + +### 2. Do not encode current workload as a machine tag + +A machine currently running Grafana, Jellyfin, or PostgreSQL does not mean that those should become machine tags. Those are current placements, not stable identity. + +If a service can move, its current presence is weak evidence for tagging. + +### 3. Use service roles for topology + +Some relationships belong in service definitions rather than host tags. + +Examples: + +- NFS producer and consumer +- persistence provider and client +- reverse proxy frontend and backend + +These are not machine identity tags; they are service-topology relationships. + +### 4. Derive tags from settings when possible + +If a machine setting already captures a fact, derive the tag from that setting instead of duplicating it by hand. + +Good examples in this repo: + +- `desktop.use` can imply whether a machine is interactive +- `hardware.has.gpu.*` can imply GPU availability +- `hardware.has.audio` can imply audio capability +- `hardware.has.bluetooth` can imply Bluetooth capability + +### 5. Avoid deriving tags from other tags + +Clan supports dynamic tags, but tag-from-tag derivation can become fragile and can even recurse. If tags need computation, compute them from machine settings or an explicit metadata source instead. + +## Proposed namespaces + +Use full words: + +- `capability:*` +- `operational:*` + +The intention is: + +- `capability:*` describes stable machine traits +- `operational:*` describes automation-relevant policy or availability behavior + +## Capability tags + +These are the strongest candidates for machine tags. + +### Runtime + +- `capability:runtime:interactive` +- `capability:runtime:headless` + +These are directly useful for deciding where a service with a user-facing local experience does or does not belong. + +### Hardware + +- `capability:hardware:gpu` +- `capability:hardware:audio` +- `capability:hardware:bluetooth` + +At the moment, the repo provides enough configuration structure to derive these from machine settings. + +GPU vendor-specific tags are intentionally excluded for now. The current conclusion is that the presence of GPU hardware may matter, but the vendor usually does not unless there is a specific workload that depends on CUDA, ROCm, or a similar stack. + +### Mobility + +- `capability:mobility:portable` +- `capability:mobility:stationary` + +These are useful concepts, but they are not currently obvious from one uniform machine setting in the repo. If they become desirable, they likely need either: + +- an explicit machine setting, or +- a stronger convention around machine form factor + +For now they are candidates, not automatic defaults. + +## Operational tags + +Operational tags are weaker than capability tags and should stay small in number. + +They should only exist when they capture real automation constraints that are not already represented elsewhere. + +### Availability + +- `operational:availability:always-on` +- `operational:availability:wake-on-demand` +- `operational:availability:manual` + +This dimension became clearer while thinking through the Mandos build-host idea. A machine may be technically capable of a workload, while its availability policy determines whether it is a sensible target. + +These tags should not be guessed from existing workloads. They should come from an explicit machine setting that states the intended availability policy. + +### Interruptibility + +- `operational:workload:interruptible` + +This is not about the machine by itself. It is a useful policy boundary for selecting machines that may host work that can be delayed, retried, paused, or moved. + +If introduced, it should again come from explicit machine policy rather than being inferred from current services. + +## What should not become machine tags + +- current service assignments, such as Jellyfin, Grafana, Forgejo, or PostgreSQL +- service topology, such as NFS producer or consumer +- user application presence, such as Discord or TeamSpeak +- detailed desktop-environment choice, such as Plasma or Gamescope +- one-off descriptions like "living room" unless location becomes a deliberate scheduling dimension + +## What is derivable today + +The repo already contains enough structure to derive several useful capability tags. + +Examples from the current configuration style: + +- if a machine enables a desktop session, derive `capability:runtime:interactive` +- if a machine does not, derive `capability:runtime:headless` +- if a machine enables `hardware.has.audio`, derive `capability:hardware:audio` +- if a machine enables `hardware.has.bluetooth`, derive `capability:hardware:bluetooth` +- if a machine enables any `hardware.has.gpu.*`, derive `capability:hardware:gpu` + +## What probably needs explicit policy + +These should not be inferred from current services or tag combinations: + +- `operational:availability:*` +- `operational:workload:interruptible` +- mobility-related tags if there is no explicit machine setting to derive them from + +The clean way to support these is to introduce one or more explicit machine settings whose purpose is to describe machine policy rather than workload. + +## Mandos update + +The Mandos wake-on-demand build-host idea adds an important refinement: + +- some machines should be eligible for background work only when they are available through a specific policy, such as wake-on-demand + +This does **not** mean Mandos should get a hand-maintained "build server" tag. + +It instead suggests a more generic pattern: + +- a machine may be interactive +- a machine may be available on demand rather than always on +- that availability policy may influence whether certain classes of automation should target it + +That strengthens the case for a very small `operational:*` namespace derived from explicit machine policy. + +## Recommended next steps + +1. Start with `capability:*` tags that are clearly derivable from machine settings. +2. Keep service topology in service roles instead of machine tags. +3. If availability policy becomes important, add an explicit machine setting for it and derive `operational:*` tags from that setting. +4. Avoid expanding the tag vocabulary until there is a clear service-selection use case for each added tag. diff --git a/modules/nixos/services/authentication/zitadel.nix b/modules/nixos/services/authentication/zitadel.nix index 6921302..bc83385 100644 --- a/modules/nixos/services/authentication/zitadel.nix +++ b/modules/nixos/services/authentication/zitadel.nix @@ -538,7 +538,7 @@ in in mkIf cfg.enable { sneeuwvlok.services = { - persistance.postgresql.enable = true; + persistence.postgresql.enable = true; networking.caddy = { hosts = { diff --git a/modules/nixos/services/communication/matrix.nix b/modules/nixos/services/communication/matrix.nix index 9cd78a5..1d1df2a 100644 --- a/modules/nixos/services/communication/matrix.nix +++ b/modules/nixos/services/communication/matrix.nix @@ -22,7 +22,7 @@ in { config = mkIf cfg.enable { sneeuwvlok.services = { - persistance.postgresql.enable = true; + persistence.postgresql.enable = true; # virtualisation.podman.enable = true; networking.caddy = { diff --git a/modules/nixos/services/development/forgejo.nix b/modules/nixos/services/development/forgejo.nix index 8e99c20..c468137 100644 --- a/modules/nixos/services/development/forgejo.nix +++ b/modules/nixos/services/development/forgejo.nix @@ -25,7 +25,7 @@ in { config = mkIf cfg.enable { sneeuwvlok.services = { - persistance.postgresql.enable = true; + persistence.postgresql.enable = true; virtualisation.podman.enable = true; networking.caddy = { diff --git a/modules/nixos/services/persistance/postgresql.nix b/modules/nixos/services/persistance/postgresql.nix index 86f63ec..f217dd5 100644 --- a/modules/nixos/services/persistance/postgresql.nix +++ b/modules/nixos/services/persistance/postgresql.nix @@ -6,9 +6,9 @@ }: let inherit (lib) mkIf mkEnableOption; - cfg = config.sneeuwvlok.services.persistance.postgresql; + cfg = config.sneeuwvlok.services.persistence.postgresql; in { - options.sneeuwvlok.services.persistance.postgresql = { + options.sneeuwvlok.services.persistence.postgresql = { enable = mkEnableOption "Postgresql"; };