Source: routes/v1/search-ai.js

/** @module routes/v1 */

// Register router

const { authorize, generateTempImagePath, createTerms } = require('../../utils.js');
const fs = require('fs');
const Jimp = require('jimp');
const similarity = require('compute-cosine-similarity');

const express = require('express');
const router = express.Router();

const { Book } = require('../../classes/books/Book.js');

const BookController = require('../../controllers/BookController.js');
const AuthorController = require('../../controllers/AuthorController.js');
const EditionController = require('../../controllers/EditionController.js');
const SpineImageController = require('../../controllers/SpineImageController.js');
const ScanController = require('../../controllers/ScanController.js');
const UserController = require('../../controllers/UserController.js');

/** Route for searching for books based on OCR text and spine image */
router.post('/search/ocr', async (req, res) => {
    if (!await authorize(['search.ocr'], req, res)) {
        return;
    }

    const scanId = req.body.scan;
    if (!scanId) {
        res.status(400).send({ message: 'Missing scan' });
        return;
    }

    const sController = new ScanController();
    const scan = await sController.byId(parseInt(scanId));

    if (!scan) {
        res.status(400).send({ message: 'Invalid scan' });
        return;
    }

    const ocrElementsStr = req.body.elements;
    if (!ocrElementsStr) {
        res.status(400).send({ message: 'Missing elements' });
        return;
    }

    const ocrElements = ocrElementsStr.split(',');

    if (ocrElements.length > 1000) {
        res.status(400).send({ message: 'Elements too long' });
        return;
    }

    if (ocrElements.length < 3) {
        res.status(400).send({ message: 'Elements too short' });
        return;
    }

    const imageB64 = req.body.image;
    let imageFilepath;
    if (imageB64) {
        const imageBuffer = Buffer.from(imageB64, 'base64');
        imageFilepath = generateTempImagePath();
        fs.writeFileSync(imageFilepath, imageBuffer);
    }

    let minConfidence = 0.1;
    if (req.body.min_confidence) {
        minConfidence = parseFloat(req.body.min_confidence);
    }

    const booksResult = await searchForBooks(ocrElements, imageFilepath, minConfidence);

    if (imageB64) {
        fs.unlinkSync(imageFilepath);
    }

    if (booksResult.possible_books.length > 0) {
        await sController.changeBooksIdentified(scanId, 1);
        await req.user.changeGoalProgressByTrackName('Scanner', 'books scanned', 1);
    }

    res.send(booksResult);
});

module.exports = router;

// Functions

const confWeights = {
    "text_similarity": 7,
    "author_text_similarity": 4,
    "spine_image": 9,
};

/**
 * Initiate search for books based on text from OCR and spine image
 * 
 * @param {string[]} ocrElements
 * @param {string} image_filepath (default null)
 * @param {number} min_conf (default 0)
 * @returns {Promise<Book[]>} possibleBooks
 */
async function searchForBooks(ocrElements, image_filepath = null, min_conf = 0) {
    const bController = new BookController();

    const dbPullStartTime = Date.now();
    const possibleBooks = await bController.searchByOCRElements(ocrElements);
    const dbPullEndTime = Date.now();

    const similarityStartTime = Date.now();
    const elementsPermutations = permutations(ocrElements);
    for (const perm of elementsPermutations) {
        for (const bookDbRecord of possibleBooks) {
            const titleSimilarity = stringSimilarity(bookDbRecord.title.toLowerCase(), perm.join(' ').toLowerCase());

            if (!bookDbRecord.bestSimilarity) {
                bookDbRecord.bestSimilarity = titleSimilarity;
            }

            if (titleSimilarity > bookDbRecord.bestSimilarity) {
                bookDbRecord.bestSimilarity = titleSimilarity;
            }
        }
    }
    const similarityEndTime = Date.now();

    const authorAndSpineStartTime = Date.now();
    const outputBooks = [];
    const twoWordPermutations = ocrElements.flatMap(
        (v, i) => ocrElements.slice(i+1).map( w => v + ' ' + w )
    );
    for (const bookDbRecord of possibleBooks) {
        let bookConf = 0;
        let totalConf = 0;

        // Confidence based on levenstein distance
        bookConf += bookDbRecord.bestSimilarity * confWeights.text_similarity;
        totalConf += confWeights.text_similarity;

        const bookInfo = await getBookInfo(bookDbRecord.id);

        // Confidence based on author similarity
        for (const author of bookInfo.authors) {
            for (const perm of twoWordPermutations) {
                const authorSimilarity = stringSimilarity(author.name.toLowerCase(), perm.toLowerCase());

                if (!bookDbRecord.bestAuthorSimilarity) {
                    bookDbRecord.bestAuthorSimilarity = authorSimilarity;
                }

                if (authorSimilarity > bookDbRecord.bestAuthorSimilarity) {
                    bookDbRecord.bestAuthorSimilarity = authorSimilarity;
                }
            }
        }

        bookConf += bookDbRecord.bestAuthorSimilarity * confWeights.author_text_similarity;
        totalConf += confWeights.author_text_similarity;

        let scannedSpineImage = false;
        if (image_filepath) {
            const spineImages = await getSpineFilepaths(bookDbRecord.id);
            if (spineImages.length >= 0) {
                let bestConf = -1;
                for (const image of spineImages) {
                    // Confidence based on spine image similarity
                    const cosineSimilarity = await getSpineSimilarity(image_filepath, image);
                    if (cosineSimilarity > bestConf) {
                        bestConf = cosineSimilarity;
                    }

                    scannedSpineImage = true;
                }

                if (bestConf >= 0) {
                    bookConf += bestConf * confWeights.spine_image;
                }
            }

            totalConf += confWeights.spine_image;
        }

        bookInfo.confidence = bookConf / totalConf;

        if (bookInfo.confidence < min_conf) {
            continue;
        }

        outputBooks.push(bookInfo);
    }
    const authorAndSpineEndTime = Date.now();

    // Keep 5 with biggest confidence
    outputBooks.sort((a, b) => b.confidence - a.confidence);
    outputBooks.splice(5);

    return {
        possible_books: outputBooks,
        time: {
            db_pull: dbPullEndTime - dbPullStartTime,
            similarity: similarityEndTime - similarityStartTime,
            author_and_spine: authorAndSpineEndTime - authorAndSpineStartTime,
        }
    };
}

/**
 * Get book information from the database
 * 
 * @param {number} bookId
 * @returns {Promise<object>}
 */
async function getBookInfo(bookId) {
    const book = await new BookController().byId(bookId);
    if (!book) {
        return null;
    }

    const authors = await new AuthorController().byBook(book.id);

    const publicationDate = book.publicationDate;
    let publicationDateStr = '';
    if (publicationDate) {
        publicationDateStr = publicationDate.getFullYear() + '-' + (publicationDate.getMonth() + 1) + '-' + publicationDate.getDate();
    }

    return {
        title: book.title,
        subtitle: book.subtitle,
        authors: authors,
        publication_date: publicationDateStr,
    };
}

/**
 * Get spine image filepaths from the database
 * 
 * @param {number} bookId
 * @returns {Promise<string[]>}
 */
async function getSpineFilepaths(bookId) {
    const book = await new BookController().byId(bookId);
    const editions = await new EditionController().byBook(book.id);
    if (editions.length === 0) {
        return [];
    }

    const spineImages = [];
    for (const edition of editions) {
        const images = await new SpineImageController().byEdition(edition.id);
        for (const image of images) {
            spineImages.push(process.env.STORAGE_PATH + "/spine-images/" + image.filepath);
        }
    }

    return spineImages;
}

/**
 * Calculates the cosine similarity between two images
 * 
 * @param {string} image1_path
 * @param {string} image2_path
 * @returns {Promise<number>}
 */
async function getSpineSimilarity(image1_path, image2_path) {
    const image1 = await Jimp.read(image1_path);
    const image2 = await Jimp.read(image2_path);

    const avgWidth = (image1.bitmap.width + image2.bitmap.width) / 2;
    const avgHeight = (image1.bitmap.height + image2.bitmap.height) / 2;

    const image1Arr = await getImagePixelArray(image1, avgWidth, avgHeight);
    const image2Arr = await getImagePixelArray(image2, avgWidth, avgHeight);

    const s = similarity(image1Arr, image2Arr);
    return s;
}

/**
 * Converts an image to an array of pixel values
 * 
 * @param {Jimp} image
 * @param {number} width
 * @param {number} height
 * @returns {Promise<number[]>}
 */
async function getImagePixelArray(image, width = 100, height = 100) {
    image = image.resize(width, height);
    const pixels = [];
    for (let y = 0; y < image.bitmap.height; y++) {
        for (let x = 0; x < image.bitmap.width; x++) {
            const color = Jimp.intToRGBA(image.getPixelColor(x, y));

            pixels.push(color.r / 255);
            pixels.push(color.g / 255);
            pixels.push(color.b / 255);
        }
    }

    return pixels;
}

// Permutations of array without needing to use every element
const permutations = arr => {
    const result = [];

    const generateCombinations = (current, remaining) => {
        if (current.length > 0) {
            result.push(current);
        }

        for (let i = 0; i < remaining.length; i++) {
            const next = remaining.slice(i + 1);
            generateCombinations([...current, remaining[i]], next);
        }
    };

    generateCombinations([], arr);
    return result;
};

/**
 * Calculate similarity between two strings
 * @param {string} str1 First string to match
 * @param {string} str2 Second string to match
 * @param {number} [substringLength=2] Optional. Length of substring to be used in calculating similarity. Default 2.
 * @param {boolean} [caseSensitive=false] Optional. Whether you want to consider case in string matching. Default false;
 * @returns Number between 0 and 1, with 0 being a low match score.
 */
const stringSimilarity = (str1, str2, substringLength = 2, caseSensitive = false) => {
	if (!caseSensitive) {
		str1 = str1.toLowerCase();
		str2 = str2.toLowerCase();
	}

	if (str1.length < substringLength || str2.length < substringLength)
		return 0;

	const map = new Map();
	for (let i = 0; i < str1.length - (substringLength - 1); i++) {
		const substr1 = str1.substr(i, substringLength);
		map.set(substr1, map.has(substr1) ? map.get(substr1) + 1 : 1);
	}

	let match = 0;
	for (let j = 0; j < str2.length - (substringLength - 1); j++) {
		const substr2 = str2.substr(j, substringLength);
		const count = map.has(substr2) ? map.get(substr2) : 0;
		if (count > 0) {
			map.set(substr2, count - 1);
			match++;
		}
	}

	return (match * 2) / (str1.length + str2.length - ((substringLength - 1) * 2));
};