feat: Implement image fetching and storage system
Some checks failed
Deploy to BeePC / deploy (push) Has been cancelled

- Add image-fetcher module for downloading and saving images from various sources.
- Create storage module for managing image files, including downloading, verifying integrity, and cleaning up orphaned files.
- Develop gallery HTML page for displaying images with sorting and filtering options.
- Set up RESTful API routes for image management, including fetching, adding tags, and deleting images.
- Introduce setup script for initializing the database and configuring image sources.
- Implement a batch process for verifying image integrity and cleaning up old images.
- Add setup batch script for easy installation and configuration of the image storage system.
This commit is contained in:
2026-02-12 13:13:36 -05:00
parent ea6cc3fc85
commit 9c72b00b1b
19 changed files with 3004 additions and 71 deletions

342
lib/database.js Normal file
View File

@@ -0,0 +1,342 @@
const sqlite3 = require('sqlite3').verbose();
const path = require('path');
const fs = require('fs');
const DB_PATH = path.join(__dirname, '..', 'data', 'homebase.db');
// Ensure data directory exists
const dataDir = path.dirname(DB_PATH);
if (!fs.existsSync(dataDir)) {
fs.mkdirSync(dataDir, { recursive: true });
}
const db = new sqlite3.Database(DB_PATH, (err) => {
if (err) {
console.error('Database connection error:', err);
} else {
console.log('Connected to SQLite database at', DB_PATH);
}
});
// Enable foreign keys
db.run('PRAGMA foreign_keys = ON');
/**
* Initialize database schema
*/
async function initializeDatabase() {
return new Promise((resolve, reject) => {
db.serialize(() => {
// Images table
db.run(
`CREATE TABLE IF NOT EXISTS images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
filename TEXT NOT NULL UNIQUE,
source_url TEXT NOT NULL,
file_path TEXT NOT NULL,
filesize INTEGER,
file_hash TEXT NOT NULL,
mime_type TEXT,
fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
is_corrupted BOOLEAN DEFAULT 0
)`,
(err) => {
if (err) reject(err);
}
);
// Tags table
db.run(
`CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY AUTOINCREMENT,
image_id INTEGER NOT NULL,
tag TEXT NOT NULL,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (image_id) REFERENCES images(id) ON DELETE CASCADE,
UNIQUE(image_id, tag)
)`,
(err) => {
if (err) reject(err);
}
);
// Create indexes for better query performance
db.run('CREATE INDEX IF NOT EXISTS idx_images_source_url ON images(source_url)');
db.run('CREATE INDEX IF NOT EXISTS idx_images_fetched_at ON images(fetched_at)');
db.run('CREATE INDEX IF NOT EXISTS idx_images_file_hash ON images(file_hash)');
db.run('CREATE INDEX IF NOT EXISTS idx_tags_image_id ON tags(image_id)');
db.run('CREATE INDEX IF NOT EXISTS idx_tags_tag ON tags(tag)', (err) => {
if (err) {
reject(err);
} else {
console.log('Database initialized successfully');
resolve();
}
});
});
});
}
/**
* Insert a new image record
*/
function insertImage(imageData) {
return new Promise((resolve, reject) => {
const { filename, source_url, file_path, filesize, file_hash, mime_type } = imageData;
db.run(
`INSERT INTO images (filename, source_url, file_path, filesize, file_hash, mime_type)
VALUES (?, ?, ?, ?, ?, ?)`,
[filename, source_url, file_path, filesize, file_hash, mime_type],
function(err) {
if (err) {
reject(err);
} else {
resolve({ id: this.lastID });
}
}
);
});
}
/**
* Add tags to an image
*/
function addTags(imageId, tags) {
return new Promise((resolve, reject) => {
if (!Array.isArray(tags) || tags.length === 0) {
resolve();
return;
}
db.serialize(() => {
const stmt = db.prepare(
'INSERT OR IGNORE INTO tags (image_id, tag) VALUES (?, ?)'
);
let completed = 0;
let hasError = false;
tags.forEach((tag) => {
stmt.run([imageId, tag], (err) => {
if (err && !hasError) {
hasError = true;
stmt.finalize();
reject(err);
} else {
completed++;
if (completed === tags.length) {
stmt.finalize((err) => {
if (err) reject(err);
else resolve();
});
}
}
});
});
});
});
}
/**
* Mark image as corrupted
*/
function markAsCorrupted(imageId) {
return new Promise((resolve, reject) => {
db.run(
'UPDATE images SET is_corrupted = 1, updated_at = CURRENT_TIMESTAMP WHERE id = ?',
[imageId],
(err) => {
if (err) reject(err);
else resolve();
}
);
});
}
/**
* Get image by ID
*/
function getImage(imageId) {
return new Promise((resolve, reject) => {
db.get(
'SELECT * FROM images WHERE id = ?',
[imageId],
(err, row) => {
if (err) reject(err);
else resolve(row);
}
);
});
}
/**
* Get all images with optional filtering
*/
function getImages(options = {}) {
return new Promise((resolve, reject) => {
const { tag, sourceUrl, limit = 100, offset = 0, sortBy = 'fetched_at', order = 'DESC' } = options;
let query = 'SELECT DISTINCT i.* FROM images i';
let params = [];
if (tag) {
query += ' INNER JOIN tags t ON i.id = t.image_id';
}
query += ' WHERE i.is_corrupted = 0';
if (tag) {
query += ' AND t.tag = ?';
params.push(tag);
}
if (sourceUrl) {
query += ' AND i.source_url = ?';
params.push(sourceUrl);
}
query += ` ORDER BY i.${sortBy} ${order} LIMIT ? OFFSET ?`;
params.push(limit, offset);
db.all(query, params, (err, rows) => {
if (err) reject(err);
else resolve(rows || []);
});
});
}
/**
* Get tags for an image
*/
function getImageTags(imageId) {
return new Promise((resolve, reject) => {
db.all(
'SELECT tag FROM tags WHERE image_id = ? ORDER BY tag',
[imageId],
(err, rows) => {
if (err) reject(err);
else resolve((rows || []).map(r => r.tag));
}
);
});
}
/**
* Get all unique tags
*/
function getAllTags() {
return new Promise((resolve, reject) => {
db.all(
'SELECT DISTINCT tag FROM tags ORDER BY tag',
(err, rows) => {
if (err) reject(err);
else resolve((rows || []).map(r => r.tag));
}
);
});
}
/**
* Get image count
*/
function getImageCount(tag = null) {
return new Promise((resolve, reject) => {
let query = 'SELECT COUNT(*) as count FROM images WHERE is_corrupted = 0';
let params = [];
if (tag) {
query = `SELECT COUNT(DISTINCT i.id) as count FROM images i
INNER JOIN tags t ON i.id = t.image_id
WHERE i.is_corrupted = 0 AND t.tag = ?`;
params.push(tag);
}
db.get(query, params, (err, row) => {
if (err) reject(err);
else resolve(row?.count || 0);
});
});
}
/**
* Delete image (soft delete - keeps file but marks for cleanup)
*/
function deleteImage(imageId) {
return new Promise((resolve, reject) => {
db.run(
'DELETE FROM images WHERE id = ?',
[imageId],
(err) => {
if (err) reject(err);
else resolve();
}
);
});
}
/**
* Clean up old images (older than specified days)
*/
function cleanupOldImages(daysOld = 30) {
return new Promise((resolve, reject) => {
db.run(
`DELETE FROM images WHERE fetched_at < datetime('now', '-' || ? || ' days')`,
[daysOld],
function(err) {
if (err) reject(err);
else resolve(this.changes);
}
);
});
}
/**
* Get images by hash (detect duplicates)
*/
function getImagesByHash(hash) {
return new Promise((resolve, reject) => {
db.all(
'SELECT * FROM images WHERE file_hash = ? AND is_corrupted = 0',
[hash],
(err, rows) => {
if (err) reject(err);
else resolve(rows || []);
}
);
});
}
/**
* Close database connection
*/
function closeDatabase() {
return new Promise((resolve, reject) => {
db.close((err) => {
if (err) reject(err);
else {
console.log('Database connection closed');
resolve();
}
});
});
}
module.exports = {
db,
initializeDatabase,
insertImage,
addTags,
markAsCorrupted,
getImage,
getImages,
getImageTags,
getAllTags,
getImageCount,
deleteImage,
cleanupOldImages,
getImagesByHash,
closeDatabase
};

162
lib/image-fetcher.js Normal file
View File

@@ -0,0 +1,162 @@
const storage = require('./storage');
const database = require('./database');
const axios = require('axios');
let fetchInterval = null;
let isRunning = false;
/**
* Fetch a single image from URL
*/
async function fetchImage(sourceUrl, tags = []) {
try {
console.log(`[Image Fetcher] Fetching from: ${sourceUrl}`);
// Download and save image
const downloadResult = await storage.downloadAndSaveImage(sourceUrl);
if (!downloadResult.success) {
console.error(`[Image Fetcher] Failed to download: ${downloadResult.error}`);
return { success: false, error: downloadResult.error };
}
// Insert into database
const imageRecord = await database.insertImage({
filename: downloadResult.filename,
source_url: sourceUrl,
file_path: downloadResult.file_path,
filesize: downloadResult.filesize,
file_hash: downloadResult.file_hash,
mime_type: downloadResult.mime_type
});
// Add tags if provided
if (tags && tags.length > 0) {
await database.addTags(imageRecord.id, tags);
}
console.log(`[Image Fetcher] Saved image: ${downloadResult.filename} (ID: ${imageRecord.id})`);
return {
success: true,
imageId: imageRecord.id,
filename: downloadResult.filename,
filesize: downloadResult.filesize
};
} catch (err) {
console.error('[Image Fetcher] Error:', err.message);
return { success: false, error: err.message };
}
}
/**
* Verify all images in database for corruption
*/
async function verifyAllImages() {
try {
console.log('[Image Fetcher] Starting verification of all images...');
const images = await database.getImages({ limit: 10000 });
let corruptCount = 0;
for (const image of images) {
const verification = await storage.verifyImageIntegrity(image.file_path, image.file_hash);
if (!verification.valid) {
console.warn(`[Image Fetcher] Corrupted image detected: ${image.filename} - ${verification.reason}`);
await database.markAsCorrupted(image.id);
corruptCount++;
}
}
if (corruptCount > 0) {
console.log(`[Image Fetcher] Found and marked ${corruptCount} corrupted images`);
} else {
console.log('[Image Fetcher] Verification complete - all images intact');
}
return { checked: images.length, corrupted: corruptCount };
} catch (err) {
console.error('[Image Fetcher] Verification error:', err.message);
}
}
/**
* Fetch from multiple sources
*/
async function fetchFromMultipleSources(sources) {
console.log(`[Image Fetcher] Fetching from ${sources.length} source(s)...`);
const results = [];
for (const source of sources) {
const sourceUrl = typeof source === 'string' ? source : source.url;
const tags = source.tags || [];
const result = await fetchImage(sourceUrl, tags);
results.push(result);
}
return results;
}
/**
* Start periodic image fetching
*/
function startFetcher(sources, intervalMinutes = 2.5) {
if (isRunning) {
console.log('[Image Fetcher] Fetcher already running');
return;
}
isRunning = true;
const intervalMs = intervalMinutes * 60 * 1000;
console.log(`[Image Fetcher] Starting fetcher with ${intervalMinutes} minute interval`);
console.log(`[Image Fetcher] Will fetch from ${sources.length} source(s)`);
// Do initial fetch immediately
fetchFromMultipleSources(sources);
// Then set up interval
fetchInterval = setInterval(() => {
fetchFromMultipleSources(sources);
}, intervalMs);
return {
status: 'running',
interval: intervalMinutes,
sources: sources.length,
nextFetch: new Date(Date.now() + intervalMs)
};
}
/**
* Stop the fetcher
*/
function stopFetcher() {
if (fetchInterval) {
clearInterval(fetchInterval);
fetchInterval = null;
isRunning = false;
console.log('[Image Fetcher] Fetcher stopped');
}
}
/**
* Get fetcher status
*/
function getStatus() {
return {
running: isRunning,
nextFetch: fetchInterval ? 'scheduled' : 'not running'
};
}
module.exports = {
fetchImage,
fetchFromMultipleSources,
startFetcher,
stopFetcher,
verifyAllImages,
getStatus
};

244
lib/storage.js Normal file
View File

@@ -0,0 +1,244 @@
const fs = require('fs');
const path = require('path');
const crypto = require('crypto');
const axios = require('axios');
const IMAGES_DIR = path.join(__dirname, '..', 'data', 'images');
// Ensure images directory exists
if (!fs.existsSync(IMAGES_DIR)) {
fs.mkdirSync(IMAGES_DIR, { recursive: true });
}
/**
* Calculate SHA256 hash of a file
*/
function calculateFileHash(filePath) {
return new Promise((resolve, reject) => {
const hash = crypto.createHash('sha256');
const stream = fs.createReadStream(filePath);
stream.on('error', reject);
stream.on('data', chunk => hash.update(chunk));
stream.on('end', () => resolve(hash.digest('hex')));
});
}
/**
* Generate a unique filename
*/
function generateFilename(sourceUrl, timestamp = Date.now()) {
const hash = crypto.createHash('md5').update(sourceUrl + timestamp).digest('hex');
const ext = getFileExtension(sourceUrl);
return `image_${timestamp}_${hash}${ext}`;
}
/**
* Get file extension from URL
*/
function getFileExtension(url) {
try {
const urlObj = new URL(url);
const pathname = urlObj.pathname;
const ext = path.extname(pathname);
// If no extension, try to get from content-type or use default
if (!ext || ext.length > 5) {
return '.jpg';
}
return ext;
} catch (err) {
return '.jpg';
}
}
/**
* Download an image and save it to storage
*/
async function downloadAndSaveImage(sourceUrl, options = {}) {
const { timeout = 30000, maxSize = 50 * 1024 * 1024 } = options;
try {
// Download image
const response = await axios({
url: sourceUrl,
method: 'GET',
responseType: 'arraybuffer',
timeout,
maxContentLength: maxSize,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
});
const filename = generateFilename(sourceUrl);
const filePath = path.join(IMAGES_DIR, filename);
const fileBuffer = Buffer.from(response.data);
const filesize = fileBuffer.length;
const file_hash = crypto.createHash('sha256').update(fileBuffer).digest('hex');
const mime_type = response.headers['content-type'] || 'image/jpeg';
// Save file
fs.writeFileSync(filePath, fileBuffer);
return {
filename,
file_path: filePath,
relative_path: path.relative(path.join(__dirname, '..'), filePath),
filesize,
file_hash,
mime_type,
source_url: sourceUrl,
success: true
};
} catch (err) {
console.error(`Failed to download image from ${sourceUrl}:`, err.message);
return {
source_url: sourceUrl,
success: false,
error: err.message
};
}
}
/**
* Verify image integrity using stored hash
*/
async function verifyImageIntegrity(filePath, expectedHash) {
try {
if (!fs.existsSync(filePath)) {
return { valid: false, reason: 'File does not exist' };
}
const actualHash = await calculateFileHash(filePath);
if (actualHash !== expectedHash) {
return { valid: false, reason: 'Hash mismatch', expectedHash, actualHash };
}
return { valid: true };
} catch (err) {
return { valid: false, reason: err.message };
}
}
/**
* Delete image file
*/
function deleteImageFile(filePath) {
return new Promise((resolve, reject) => {
try {
if (fs.existsSync(filePath)) {
fs.unlinkSync(filePath);
resolve(true);
} else {
resolve(false);
}
} catch (err) {
reject(err);
}
});
}
/**
* Get image file as buffer
*/
function getImageBuffer(filePath) {
return new Promise((resolve, reject) => {
fs.readFile(filePath, (err, data) => {
if (err) reject(err);
else resolve(data);
});
});
}
/**
* Get storage statistics
*/
function getStorageStats() {
return new Promise((resolve, reject) => {
try {
let totalSize = 0;
let fileCount = 0;
if (!fs.existsSync(IMAGES_DIR)) {
resolve({ totalSize: 0, fileCount: 0, dataDir: IMAGES_DIR });
return;
}
const files = fs.readdirSync(IMAGES_DIR);
files.forEach((file) => {
const filePath = path.join(IMAGES_DIR, file);
const stats = fs.statSync(filePath);
if (stats.isFile()) {
totalSize += stats.size;
fileCount++;
}
});
resolve({
totalSize,
totalSizeGB: (totalSize / (1024 ** 3)).toFixed(2),
fileCount,
dataDir: IMAGES_DIR
});
} catch (err) {
reject(err);
}
});
}
/**
* Clean up orphaned files (files in storage but not in database)
*/
async function cleanupOrphanedFiles(db) {
try {
if (!fs.existsSync(IMAGES_DIR)) {
return { cleaned: 0 };
}
const files = fs.readdirSync(IMAGES_DIR);
let cleaned = 0;
for (const file of files) {
const filePath = path.join(IMAGES_DIR, file);
// Check if file exists in database
const existsInDb = await new Promise((resolve) => {
db.get(
'SELECT id FROM images WHERE filename = ?',
[file],
(err, row) => resolve(!err && !!row)
);
});
if (!existsInDb) {
try {
fs.unlinkSync(filePath);
cleaned++;
} catch (err) {
console.error(`Failed to delete orphaned file ${file}:`, err.message);
}
}
}
return { cleaned };
} catch (err) {
console.error('Cleanup error:', err);
return { cleaned: 0, error: err.message };
}
}
module.exports = {
downloadAndSaveImage,
verifyImageIntegrity,
deleteImageFile,
getImageBuffer,
getStorageStats,
calculateFileHash,
generateFilename,
getFileExtension,
cleanupOrphanedFiles,
IMAGES_DIR
};