scry/tools/DbGenerator/Program.cs
Chris Kruining 0801ceee6a
.
2026-02-05 09:41:07 +01:00

377 lines
12 KiB
C#

using Scry.Core.Data;
using Scry.Core.Imaging;
using Scry.Core.Models;
using Scry.Core.Scryfall;
using SkiaSharp;
// Generate a card hash database from Scryfall images
// Usage: dotnet run -- <output-db> [--count N] [--include-test-cards] [--force]
var outputDb = args.Length > 0 ? args[0] : "card_hashes.db";
var maxCards = 500;
var includeTestCards = true;
var forceRebuild = false;
// Parse arguments
for (var i = 0; i < args.Length; i++)
{
if (args[i] == "--count" && i + 1 < args.Length && int.TryParse(args[i + 1], out var parsedCount))
{
maxCards = parsedCount;
i++;
}
else if (args[i] == "--include-test-cards")
{
includeTestCards = true;
}
else if (args[i] == "--no-test-cards")
{
includeTestCards = false;
}
else if (args[i] == "--force")
{
forceRebuild = true;
}
}
Console.WriteLine($"Generating hash database with up to {maxCards} cards");
Console.WriteLine($"Output: {outputDb}");
Console.WriteLine($"Include test cards: {includeTestCards}");
Console.WriteLine($"Force rebuild: {forceRebuild}");
Console.WriteLine();
// Cards that should be included for testing with preferred sets
// Key: card name, Value: preferred set codes (first match wins) or empty for any
var priorityCardsWithSets = new Dictionary<string, string[]>(StringComparer.OrdinalIgnoreCase)
{
// From reference_alpha/ - prefer LEA (Alpha) or LEB (Beta) for classic look
["Ancestral Recall"] = ["lea", "leb"],
["Badlands"] = ["lea", "leb"],
["Balance"] = ["lea", "leb"],
["Bayou"] = ["lea", "leb"],
["Birds of Paradise"] = ["lea", "leb"],
["Black Lotus"] = ["lea", "leb"],
["Channel"] = ["lea", "leb"],
["Chaos Orb"] = ["lea", "leb"],
["Clone"] = ["lea", "leb"],
["Control Magic"] = ["lea", "leb"],
["Counterspell"] = ["lea", "leb"],
["Dark Ritual"] = ["lea", "leb"],
["Demonic Tutor"] = ["lea", "leb"],
["Disenchant"] = ["lea", "leb"],
["Fireball"] = ["lea", "leb"],
["Force of Nature"] = ["lea", "leb"],
["Fork"] = ["lea", "leb"],
["Giant Growth"] = ["lea", "leb"],
["Hypnotic Specter"] = ["lea", "leb"],
["Lightning Bolt"] = ["lea", "leb"],
["Llanowar Elves"] = ["lea", "leb"],
["Mahamoti Djinn"] = ["lea", "leb"],
["Mind Twist"] = ["lea", "leb"],
["Mox Emerald"] = ["lea", "leb"],
["Mox Jet"] = ["lea", "leb"],
["Mox Pearl"] = ["lea", "leb"],
["Mox Ruby"] = ["lea", "leb"],
["Mox Sapphire"] = ["lea", "leb"],
["Nightmare"] = ["lea", "leb"],
["Plateau"] = ["lea", "leb"],
["Regrowth"] = ["lea", "leb"],
["Rock Hydra"] = ["lea", "leb"],
["Royal Assassin"] = ["lea", "leb"],
["Savannah"] = ["lea", "leb"],
["Scrubland"] = ["lea", "leb"],
["Serra Angel"] = ["lea", "leb"],
["Shivan Dragon"] = ["lea", "leb"],
["Sol Ring"] = ["lea", "leb"],
["Swords to Plowshares"] = ["lea", "leb"],
["Taiga"] = ["lea", "leb"],
["Time Walk"] = ["lea", "leb"],
["Timetwister"] = ["lea", "leb"],
["Tropical Island"] = ["lea", "leb"],
["Tundra"] = ["lea", "leb"],
["Underground Sea"] = ["lea", "leb"],
["Wheel of Fortune"] = ["lea", "leb"],
["Wrath of God"] = ["lea", "leb"],
// From reference/ - any set is fine
["Brainstorm"] = [],
["Force of Will"] = [],
["Griselbrand"] = [],
["Lotus Petal"] = [],
["Ponder"] = [],
["Show and Tell"] = [],
["Volcanic Island"] = [],
["Wasteland"] = [],
// From single_cards/ - any set is fine
["Adanto Vanguard"] = [],
["Angel of Sanctions"] = [],
["Attunement"] = [],
["Avaricious Dragon"] = [],
["Burgeoning"] = [],
["Jarad, Golgari Lich Lord"] = [],
["Meletis Charlatan"] = [],
["Mindstab Thrull"] = [],
["Pacifism"] = [],
["Platinum Angel"] = [],
["Queen Marchesa"] = [],
["Spellseeker"] = [],
["Tarmogoyf"] = [],
["Thought Reflection"] = [],
["Unsummon"] = [],
// From varying_quality - prefer older sets
["Dragon Whelp"] = ["lea", "leb"],
["Evil Eye of Orms-by-Gore"] = [],
["Instill Energy"] = ["lea", "leb"],
// Popular cards for general testing
["Lightning Helix"] = [],
["Path to Exile"] = [],
["Thoughtseize"] = [],
["Fatal Push"] = [],
["Snapcaster Mage"] = [],
["Jace, the Mind Sculptor"] = [],
["Liliana of the Veil"] = [],
["Noble Hierarch"] = [],
["Goblin Guide"] = [],
["Eidolon of the Great Revel"] = [],
};
var priorityCards = new HashSet<string>(priorityCardsWithSets.Keys, StringComparer.OrdinalIgnoreCase);
// Force rebuild if requested
if (forceRebuild && File.Exists(outputDb))
{
Console.WriteLine("Force rebuild requested, removing existing database...");
File.Delete(outputDb);
}
using var httpClient = new HttpClient();
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("Scry/1.0 (MTG Card Scanner - Database Generator)");
using var scryfallClient = new ScryfallClient(httpClient);
using var db = new CardHashDatabase(outputDb);
// Check existing database state
var existingCardIds = await db.GetExistingCardIdsAsync();
var existingCardNames = await db.GetExistingCardNamesAsync();
var existingCount = await db.GetHashCountAsync();
var storedScryfallDate = await db.GetMetadataAsync("scryfall_updated_at");
Console.WriteLine($"Existing database has {existingCount} cards");
Console.WriteLine("Fetching bulk data info from Scryfall...");
var bulkInfo = await scryfallClient.GetBulkDataInfoAsync("unique_artwork");
if (bulkInfo?.DownloadUri == null)
{
Console.WriteLine("Failed to get bulk data info from Scryfall");
return 1;
}
Console.WriteLine($"Scryfall data last updated: {bulkInfo.UpdatedAt}");
// Check if we need to update at all
var scryfallDateStr = bulkInfo.UpdatedAt?.ToString("O") ?? "";
var needsUpdate = existingCount == 0 ||
storedScryfallDate != scryfallDateStr ||
existingCount < maxCards;
// Also check if all priority cards exist
var missingPriorityCards = includeTestCards
? priorityCards.Where(c => !existingCardNames.Contains(c)).ToList()
: new List<string>();
if (missingPriorityCards.Count > 0)
{
Console.WriteLine($"Missing {missingPriorityCards.Count} priority cards");
needsUpdate = true;
}
if (!needsUpdate)
{
Console.WriteLine("Database is up-to-date, no changes needed");
return 0;
}
Console.WriteLine($"Downloading card data from: {bulkInfo.DownloadUri}");
Console.WriteLine();
var newHashes = new List<CardHash>();
var processed = 0;
var errors = 0;
var skipped = 0;
var priorityFound = 0;
var priorityNeeded = includeTestCards ? priorityCards.Count : 0;
// Track which priority cards we've already found with their set
// Key: card name, Value: set code
var foundPriorityWithSet = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
// Track deferred priority cards that might get a better set later
// Key: card name, Value: list of (cardHash, setCode) candidates
var deferredPriority = new Dictionary<string, List<(CardHash hash, string set)>>(StringComparer.OrdinalIgnoreCase);
// Helper to check if a set is preferred for a priority card
bool IsPreferredSet(string cardName, string setCode)
{
if (!priorityCardsWithSets.TryGetValue(cardName, out var preferredSets))
return false;
return preferredSets.Length == 0 || preferredSets.Contains(setCode, StringComparer.OrdinalIgnoreCase);
}
await foreach (var card in scryfallClient.StreamBulkDataAsync(bulkInfo.DownloadUri))
{
// Skip non-English cards
if (card.Lang != "en")
continue;
var imageUri = card.GetImageUri("normal");
if (string.IsNullOrEmpty(imageUri))
continue;
var cardId = card.Id ?? Guid.NewGuid().ToString();
var cardName = card.Name ?? "Unknown";
var setCode = card.Set ?? "???";
// Check if this card already exists in the database
if (existingCardIds.Contains(cardId))
{
skipped++;
continue;
}
// Check if this is a priority card we might need
var isPriorityCard = includeTestCards && priorityCards.Contains(cardName);
var isPreferred = isPriorityCard && IsPreferredSet(cardName, setCode);
// If this priority card already found with preferred set, skip
if (isPriorityCard && foundPriorityWithSet.TryGetValue(cardName, out var existingSet))
{
if (IsPreferredSet(cardName, existingSet))
{
// Already have preferred version
continue;
}
// We have a non-preferred version; if this is preferred, we'll replace
if (!isPreferred)
{
continue;
}
}
// Calculate how many slots we have left
var totalCards = existingCount + newHashes.Count;
var priorityRemaining = priorityNeeded - foundPriorityWithSet.Count;
var slotsForNonPriority = maxCards - priorityRemaining;
// Skip if we have enough non-priority cards and this isn't priority
if (!isPriorityCard && totalCards >= slotsForNonPriority)
continue;
// Download and process image
try
{
Console.Write($"[{processed + 1}] {cardName}... ");
var imageBytes = await httpClient.GetByteArrayAsync(imageUri);
using var bitmap = SKBitmap.Decode(imageBytes);
if (bitmap == null)
{
Console.WriteLine("decode failed");
errors++;
continue;
}
// Apply CLAHE preprocessing and compute hash
using var preprocessed = ImagePreprocessor.ApplyClahe(bitmap);
var hash = PerceptualHash.ComputeColorHash(preprocessed);
var cardHash = new CardHash
{
CardId = cardId,
Name = cardName,
SetCode = setCode,
CollectorNumber = card.CollectorNumber,
Hash = hash,
ImageUri = imageUri
};
newHashes.Add(cardHash);
if (isPriorityCard)
{
foundPriorityWithSet[cardName] = setCode;
priorityFound++;
Console.WriteLine($"OK (priority, {setCode})");
}
else
{
Console.WriteLine($"OK ({setCode})");
}
processed++;
// Check if we have enough cards
var foundAllPriority = foundPriorityWithSet.Count >= priorityNeeded;
if (existingCount + newHashes.Count >= maxCards && foundAllPriority)
{
Console.WriteLine($"\nReached {maxCards} cards limit with all priority cards");
break;
}
// Rate limit to be nice to Scryfall
await Task.Delay(50);
}
catch (Exception ex)
{
Console.WriteLine($"error: {ex.Message}");
errors++;
}
}
Console.WriteLine();
Console.WriteLine($"Skipped (already in DB): {skipped}");
Console.WriteLine($"Newly processed: {processed} cards");
Console.WriteLine($"New priority cards found: {priorityFound}");
Console.WriteLine($"Total priority cards: {foundPriorityWithSet.Count}/{priorityNeeded}");
Console.WriteLine($"Errors: {errors}");
Console.WriteLine();
if (newHashes.Count > 0)
{
Console.WriteLine($"Inserting {newHashes.Count} new hashes into database...");
await db.InsertHashBatchAsync(newHashes);
}
await db.SetMetadataAsync("generated_at", DateTime.UtcNow.ToString("O"));
await db.SetMetadataAsync("scryfall_updated_at", scryfallDateStr);
var finalCount = await db.GetHashCountAsync();
await db.SetMetadataAsync("card_count", finalCount.ToString());
Console.WriteLine($"Database now has {finalCount} cards: {outputDb}");
// Report missing priority cards
if (includeTestCards)
{
var missing = priorityCards.Where(c => !foundPriorityWithSet.ContainsKey(c)).ToList();
if (missing.Count > 0)
{
Console.WriteLine();
Console.WriteLine($"Missing priority cards ({missing.Count}):");
foreach (var name in missing.Take(20))
{
Console.WriteLine($" - {name}");
}
if (missing.Count > 20)
{
Console.WriteLine($" ... and {missing.Count - 20} more");
}
}
}
return 0;