/** @module routes/v1 */
// Register router
const fs = require('fs');
const { spawn } = require('child_process');
const Jimp = require('jimp');
const TokenBucket = require('tokenbucket');
const { GoogleGenAI, Type } = require('@google/genai');
const express = require('express');
const router = express.Router();
const ScanController = require('../../controllers/ScanController.js');
const UserController = require('../../controllers/UserController.js');
const BookController = require('../../controllers/BookController.js');
const { UserFactory } = require('../../classes/users/User.js');
const { authorize, generateTempImagePath } = require('../../utils.js');
const { BookFactory } = require('../../classes/books/Book.js');
// 4000 RPM
const geminiTokenBucket = new TokenBucket({
size: 4000,
interval: 60000,
tokensToAddPerInterval: 4000,
});
const ai = new GoogleGenAI({
apiKey: process.env.GOOGLE_GENAI_API_KEY,
});
/** Route for running AI inference using the DAHLS model & scan all books */
router.post('/scan/upload', async (req, res) => {
if (!await authorize(['inference.spine'], req, res)) {
return;
}
if (!req.busboy) {
res.status(400).send({ message: 'No file provided' });
return;
}
const latitude = req.body.lat;
const longitude = req.body.lng;
const sController = new ScanController();
let scan;
if (latitude && longitude) {
scan = await sController.insertWithLocation(req.user.id, latitude, longitude)
} else {
scan = await sController.insert(req.user.id);
}
if (!scan) {
res.status(500).send({ success: false, message: 'Scan not created' });
return;
}
const imagePath = process.env.STORAGE_PATH + `/scans/${scan.id}.jpg`;
let fstream = fs.createWriteStream(imagePath);
let streamClosed = false;
req.pipe(req.busboy);
req.busboy.on('file', (fieldname, file, filename) => {
file.pipe(fstream);
file.on('close', () => {
streamClosed = true;
});
});
await new Promise(resolve => {
fstream.on('close', resolve);
});
while (!streamClosed) {
await new Promise(resolve => setTimeout(resolve, 100));
}
await sController.changeImagePath(scan.id, imagePath);
const segmentData = await segmentImage(imagePath);
if (!segmentData) {
res.status(500).send({ success: false, message: 'Error segmenting image' });
await sController.delete(scan.id);
fs.unlinkSync(imagePath);
return;
}
if (segmentData.length === 0) {
res.status(404).send({ success: false, message: 'No spines found' });
return;
}
startScan(scan.id, req.user.id, imagePath, segmentData);
const amountOfSpines = segmentData.length;
await sController.changeSpinesSegmentedBy(scan.id, amountOfSpines);
const estimatedCompletionTime = (6 + 0.2 * amountOfSpines) * 1000; // 6 seconds for inference + 0.1 seconds per image
res.status(200).send({
success: true,
scan_id: scan.id,
books: segmentData.length,
estimated_completion: estimatedCompletionTime
});
});
/** Route for getting scan results */
router.get('/scan/:scan_id', async (req, res) => {
if (!await authorize(['inference.spine'], req, res)) {
return;
}
const scanId = req.params.scan_id;
if (!scanId) {
res.status(400).send({ success: false, message: 'Missing scan ID' });
return;
}
const sController = new ScanController();
const scan = await sController.byId(scanId);
if (!scan) {
res.status(400).send({ success: false, message: 'Invalid scan ID' });
return;
}
if (scan.user_id !== req.user.id) {
res.status(403).send({ success: false, message: 'Insufficient permissions' });
return;
}
const results = await sController.getResultsAndData(scanId);
res.status(200).send({
success: true,
scan_id: scan.id,
status: scan.status,
created_at: scan.created_at,
location: {
lat: scan.latitude,
lng: scan.longitude,
},
amount_of_books_identified: scan.amount_of_books_identified,
amount_of_spines_segmented: scan.amount_of_spines_segmented,
books: results.map(book => ({
id: book.id,
title: book.title,
subtitle: book.subtitle,
segment_area: JSON.parse(book.segment).map(point => ({
x: point[0],
y: point[1],
})),
authors: book.authors.map(author => ({
id: author.id,
name: author.name,
personal_name: author.personal_name,
})),
})),
});
});
/** Route for deleting a scan */
router.post('/scan/delete', async (req, res) => {
if (!await authorize(['inference.spine'], req, res)) {
return;
}
const scanId = req.body.scan_id;
if (!scanId) {
res.status(400).send({ message: 'Missing scan_id' });
return;
}
const sController = new ScanController();
const scan = await sController.byId(scanId);
if (!scan) {
res.status(400).send({ message: 'Invalid scan_id' });
return;
}
if (scan.user_id !== req.user.id) {
res.status(403).send({ message: 'Insufficient permissions' });
return;
}
await sController.delete(scanId);
res.send({ message: 'Scan deleted' });
});
module.exports = router;
// Functions
/**
* Start the scan process
*
* @param {number} scanId
* @param {number} userId
* @param {string} imagePath
* @return {Promise<void>}
*/
async function startScan(scanId, userId, imagePath, segmentData) {
await scanSpines(scanId, segmentData, imagePath);
const uController = new UserController();
const user = await new UserFactory().load(await uController.byId(userId)).create();
await user.changeGoalProgressByTrackName('Scanner', 'bookshelves scanned', 1);
}
/**
* Run the DAHLS model on the image
*
* @param {string} imagePath
* @returns {Promise<any>}
*/
async function segmentImage(imagePath) {
const image = fs.readFileSync(imagePath, {
encoding: 'base64',
});
const response = await fetch(
'https://serverless.roboflow.com/dahl-s-book-spine-detection/4?api_key=' + process.env.ROBOFLOW_API_KEY,
{
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
},
body: image,
}
);
const data = await response.json();
return data.predictions;
}
async function scanSpines(scanId, segmentData, imagePath) {
const originalImage = await Jimp.read(imagePath);
const bController = new BookController();
const sController = new ScanController();
let totalSpines = segmentData.length;
let completedSpines = 0;
await sController.updateStatus(scanId, 'processing');
const searchCache = new Map();
for (const segment of segmentData) {
const originalImageCopy = originalImage.clone();
let minX = Math.min(...segment.points.map(p => p.x));
let maxX = Math.max(...segment.points.map(p => p.x));
let minY = Math.min(...segment.points.map(p => p.y));
let maxY = Math.max(...segment.points.map(p => p.y));
const width = maxX - minX + 1;
const height = maxY - minY + 1;
const mask = new Jimp(originalImageCopy.bitmap.width, originalImageCopy.bitmap.height, 0x000000FF);
const polygon = segment.points.map(p => [p.x, p.y]);
mask.scan(0, 0, mask.bitmap.width, mask.bitmap.height, function (x, y, idx) {
if (pointInPolygon(x, y, polygon)) {
this.bitmap.data[idx + 0] = 255; // R
this.bitmap.data[idx + 1] = 255; // G
this.bitmap.data[idx + 2] = 255; // B
this.bitmap.data[idx + 3] = 255; // A
}
});
originalImageCopy.mask(mask, 0, 0);
const croppedImage = originalImageCopy.crop(minX, minY, width, height);
await croppedImage.writeAsync('./storage/temp/test.jpg');
const croppedImageB64 = await croppedImage.getBase64Async(Jimp.MIME_JPEG);
await geminiTokenBucket.removeTokens(1);
const contents = [
{
role: 'user',
parts: [
{
inlineData: {
mimeType: "image/jpeg",
data: croppedImageB64.replace('data:image/jpeg;base64,', ''),
},
},
{
type: 'text',
text: "What is the book shown in the image?",
},
],
}
]
const config = {
responseMimeType: 'application/json',
responseSchema: {
type: Type.OBJECT,
required: ["title", "author"],
properties: {
title: {
type: Type.STRING,
},
author: {
type: Type.STRING,
},
},
},
systemInstruction: [
{
text: `If no author name is present, infer the author from your knowledge. If nothing is present, output "null".`,
}
],
};
ai.models.generateContent({
model: 'gemini-2.0-flash-lite',
config,
contents,
}).then(async data => {
try {
const result = JSON.parse(data.candidates[0].content.parts[0].text);
const title = result.title != 'null' ? result.title : null;
const author = result.author != 'null' ? result.author : null;
if (!title) {
completedSpines++;
return null;
}
let bookSearchResults;
if (searchCache[title.toLowerCase() + '-' + (author || 'null').toLowerCase()]) {
bookSearchResults = searchCache[title.toLowerCase() + '-' + author.toLowerCase()];
} else if (title && author) {
bookSearchResults = await bController.searchByTitleAndAuthorName(title, author || '');
bookSearchResults.forEach(book => { book.similarity = stringSimilarity(book.title, title); });
bookSearchResults.sort((a, b) => b.similarity - a.similarity);
searchCache[title.toLowerCase() + '-' + author.toLowerCase()] = bookSearchResults;
} else {
bookSearchResults = await bController.searchByTitleNatural(title, 100);
bookSearchResults.forEach(book => { book.similarity = stringSimilarity(book.title, title); });
bookSearchResults.sort((a, b) => b.similarity - a.similarity);
bookSearchResults = bookSearchResults.filter(book => book.similarity > 0.5);
searchCache[title.toLowerCase() + '-' + (author || 'null').toLowerCase()] = bookSearchResults;
}
if (bookSearchResults.length == 0) {
completedSpines++;
return null;
}
const bestMatch = bookSearchResults[0];
await sController.insertResult(scanId, bestMatch.id, segment.points.map(p => [Math.round(p.x), Math.round(p.y)]));
await sController.changeBooksIdentified(scanId, 1);
completedSpines++;
return data;
} catch (e) {
console.error('Error parsing response:', e);
completedSpines++;
return null;
}
}).catch(error => {
console.error('Error:', error);
});
}
while (completedSpines < totalSpines) {
await new Promise(resolve => setTimeout(resolve, 100));
}
await sController.updateStatus(scanId, 'completed');
}
function pointInPolygon(x, y, polygon) {
let inside = false;
for (let i = 0, j = polygon.length - 1; i < polygon.length; j = i++) {
const xi = polygon[i][0], yi = polygon[i][1];
const xj = polygon[j][0], yj = polygon[j][1];
const intersect = ((yi > y) !== (yj > y)) &&
(x < (xj - xi) * (y - yi) / (yj - yi + 0.00001) + xi);
if (intersect) inside = !inside;
}
return inside;
}
/**
* Calculate similarity between two strings
* @param {string} str1 First string to match
* @param {string} str2 Second string to match
* @param {number} [substringLength=2] Optional. Length of substring to be used in calculating similarity. Default 2.
* @param {boolean} [caseSensitive=false] Optional. Whether you want to consider case in string matching. Default false;
* @returns Number between 0 and 1, with 0 being a low match score.
*/
const stringSimilarity = (str1, str2, substringLength = 2, caseSensitive = false) => {
if (!caseSensitive) {
str1 = str1.toLowerCase();
str2 = str2.toLowerCase();
}
if (str1.length < substringLength || str2.length < substringLength)
return 0;
const map = new Map();
for (let i = 0; i < str1.length - (substringLength - 1); i++) {
const substr1 = str1.substr(i, substringLength);
map.set(substr1, map.has(substr1) ? map.get(substr1) + 1 : 1);
}
let match = 0;
for (let j = 0; j < str2.length - (substringLength - 1); j++) {
const substr2 = str2.substr(j, substringLength);
const count = map.has(substr2) ? map.get(substr2) : 0;
if (count > 0) {
map.set(substr2, count - 1);
match++;
}
}
return (match * 2) / (str1.length + str2.length - ((substringLength - 1) * 2));
};