levenshtein | Sort by levenshtein distance | | Search

This code implements a fuzzy search function, levSearch, that finds objects in a dataset based on the closest match to a given search term using Levenshtein distance.

Run example

npm run import -- "search levenshtein distance"

search levenshtein distance

var importer = require('../Core');
var levDist = importer.import().levDist;

function getStr(keys, obj) {
    if(typeof keys === 'string') {
        keys = [keys]
    }
    return keys.reduce((arr, id) => {
        var value = id.split('.').reduce((o, i) => o[i], obj)
        return arr.concat(Array.isArray(value)
                            ? value
                            : [value])
    }, [])
}

function levSearch(cache, config, search) {
    var result = cache.map((a) => a)
    // TODO: tokenize
    result.sort(function (a, b) {
        var minA = Math.min.apply(null, getStr(config.keys, a)
            .map(s => levDist(s, search)))
        var minB = Math.min.apply(null, getStr(config.keys, b)
            .map(s => levDist(s, search)))
        return minA - minB
    })
    return result
        .filter(c => c)
        .map(c => config.id.split('.').reduce((o, i) => o[i], c))
}

module.exports = levSearch;

What the code could have been:

import importer from '../Core';
import { levDist } from `${__filename}[0]`;

/**
 * Extract a string value from an object based on a dot notation key.
 *
 * @param {string|string[]} keys - The key(s) to extract value from.
 * @param {object} obj - The object to extract value from.
 * @return {string[]} The extracted string value(s) or empty array if not found.
 */
function getStr(keys, obj) {
    if (typeof keys ==='string') {
        keys = [keys];
    }
    return keys.reduce((arr, id) => {
        const value = id.split('.').reduce((o, i) => o[i], obj);
        return arr.concat(Array.isArray(value)? value : [value]);
    }, []);
}

/**
 * Perform a Levenshtein search on a cache of objects based on a search query.
 *
 * @param {object[]} cache - The cache of objects to search.
 * @param {object} config - The configuration object containing keys and id.
 * @param {string} search - The search query to perform Levenshtein distance on.
 * @return {object[]} The sorted and filtered search results.
 */
function levSearch(cache, config, search) {
    // Remove null or undefined values from cache to avoid errors
    const filteredCache = cache.filter(Boolean);

    // Tokenize the search query (TODO: implement tokenization)
    const tokens = search.split('');

    // Calculate Levenshtein distance for each object in the cache
    const result = filteredCache.map((a) => {
        const distances = getStr(config.keys, a).map((s) => levDist(s, search));
        return {
           ...a,
            distance: Math.min(...distances),
        };
    });

    // Sort the result based on the minimum distance
    result.sort((a, b) => a.distance - b.distance);

    // Map the result to the original object structure
    return result.map((c) => config.id.split('.').reduce((o, i) => o[i], c));
}

export default levSearch;

This code defines a function levSearch that performs a fuzzy search within a dataset based on Levenshtein distance.

Here's a breakdown:

  1. Imports:

  2. getStr Function:

  3. levSearch Function:

  4. Module Export:

In essence, this code provides a fuzzy search functionality that finds objects in a dataset based on the similarity of their values to a given search term, using Levenshtein distance as the similarity metric.