string-overlap-matching-degree

计算字符串重叠/匹配度计算

Version vom 24.07.2024. Aktuellste Version

Dieses Skript sollte nicht direkt installiert werden. Es handelt sich hier um eine Bibliothek für andere Skripte, welche über folgenden Befehl in den Metadaten eines Skriptes eingebunden wird // @require https://update.greatest.deepsurf.us/scripts/501646/1416053/string-overlap-matching-degree.js

// ==UserScript==
// @name string-overlap-matching-degree
// @namespace http://tampermonkey.net/
// @version 1.0.0
// @description 计算字符串重叠/匹配度计算
// @license Apache License 2.0
// @author zhuangjie
// ==/UserScript==
 
// commented by zhuangjie
 
(function () {
    "use strict";
/**
 * 重叠匹配度
 * @author: zhuangjie
 * @date: 2024-07-23
 */
function overlapMatchingDegreeForObjectArray(keyword = "", objArr = [], fun = (obj) => [], {sort = "desc",onlyHasScope =false,scopeForObjArrContainer} = {}) {
    const scopeForData = objArr.map(item => overlapMatchingDegree(keyword, fun(item), sort));
    // scope与 objArr 同步排序
    sortAndSync(scopeForData, objArr)
    if(Array.isArray(scopeForObjArrContainer)) {
        // 说明需要分数,倒给
        scopeForObjArrContainer.push(...scopeForData)
    }
    return onlyHasScope ? filterMismatchItem(scopeForData,objArr) : objArr;
}

// 根据scopeForData得到新数组objArr
function filterMismatchItem(scopeForData,objArr) {
    const result = []
    for(let [scope,index] of scopeForData.entries()) {
        if(scope != 0) {
            result.push(objArr[index])
        }
    }
    return result
}

/**
 * 计算匹配度外层封装工具
 * @param {string} keyword - 匹配字符串1
 * @param {Object | Arrayy} topicWeighs - 匹配字符串2与它的权重
 * @returns {number} 匹配度分数
 */
function overlapMatchingDegree(keyword, topicWeighs = {}, sort = "desc") {
    // 兼容topicWeighs为topic数组,元素越靠前权重越高
    if (Array.isArray(topicWeighs)) {
        const _temp = {}
        if (sort === "asc") {
            for (let i = 1; i <= topicWeighs.length; i++) {
                _temp[topicWeighs[i - 1]] = i;
            }
        } else {
            // desc
            for (let i = topicWeighs.length; i > 0; i--) {
                _temp[topicWeighs[topicWeighs.length - i]] = i;
            }
        }
        topicWeighs = _temp;
    }
    let topicList = Object.keys(topicWeighs)
    // topic map 得分
    const topicScores = topicList.map(topic => {
        let topicScore = 0; // 得分
        const currentTopicScore = topicWeighs[topic]; // 当前topic“个数”
        const overlapLengthBlocksMap = findOverlapBlocks(keyword, topic)
        const overlapLengthKeys = Object.keys(overlapLengthBlocksMap);
        for (let overlapLengthKey of overlapLengthKeys) {
            const currentLengthBlocks = overlapLengthBlocksMap[overlapLengthKey];
            topicScore = Math.pow(currentTopicScore, overlapLengthKey) * currentLengthBlocks.length;
        }
        return topicScore;
    })
    // 算总分返回
    return topicScores.reduce((a, b) => a + b, 0)
}

/**
 * 查找重叠匹配块(入口函数)
 * @param {*} str1 
 * @param {*} str2 
 * @returns 返回重叠块 如:{"2": ["好用","推荐"],"3": ["好用推荐"]}
 * 算法核心思想:
 * -----------------------------------------------------
 * sumatrapdf*          | sumatrapdf*      | sumatrapdf*
 *           pdf-       |          pdf-    |         pdf-
 * ------------------------------------------------------
 */
function findOverlapBlocks(str1 = "", str2 = "") {
    let maxStr2OffsetLength = str1.length - 1;
    let minStr2OffsetLength = -(str2.length - 1);
    const alignmentHub = { /* "3": ["好用","推荐"] */ }
    for (let currentStr2OffsetLength = maxStr2OffsetLength; currentStr2OffsetLength >= minStr2OffsetLength; currentStr2OffsetLength--) {
        let str1EndIndex = str1.length - 1;
        let str2EndIndex = currentStr2OffsetLength + (str2.length - 1);

        let overlappingStart = currentStr2OffsetLength >= 0 ? currentStr2OffsetLength : 0; // 重叠闭区间开始 [
        let overlappingEnd = str2EndIndex < str1EndIndex ? str2EndIndex : str1EndIndex; // 重叠闭区间结束 ]

        // 对齐
        const alignmentContent = alignment(str1.substring(overlappingStart, overlappingEnd + 1), str2.substring(overlappingStart - currentStr2OffsetLength, overlappingEnd - currentStr2OffsetLength + 1));
        // 长重叠 覆盖 短重叠
        longOverlappingCoverage(alignmentHub, alignmentContent);
    }
    return alignmentHub;
}
function longOverlappingCoverage(alignmentHub = {}, alignmentContent = {}) {
    const modifiedCols = Object.keys(alignmentContent)
    const maxmodifiedCols = Math.max(...modifiedCols)
    const minmodifiedCols = Math.min(...modifiedCols)
    // 合并到对应的列上
    modifiedCols.forEach(col => {
        alignmentHub[col] = alignmentHub[col] ? Array.from(new Set(alignmentHub[col].concat(alignmentContent[col]))) : alignmentContent[col];
    })
    const alignmentHubCol = Object.keys(alignmentHub)
    const gtmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) > maxmodifiedCols);
    const ltmodifiedCols = alignmentHubCol.filter(col => parseFloat(col) < minmodifiedCols);
    // 重新计算各列,过滤掉在modifiedCols的`大于modifiedCols的列`的子串,过滤掉`小于modifiedCols的列`在modifiedCols的子串
    // - 过滤掉在modifiedCols的`大于modifiedCols的列`的子串
    colElementFilter(alignmentHub, modifiedCols, gtmodifiedCols);
    // - 过滤掉`小于modifiedCols的列`在modifiedCols的子串
    colElementFilter(alignmentHub, ltmodifiedCols, modifiedCols);

}
// 列元素过滤
function colElementFilter(alignmentHub = {}, cols = [], gtCols = []) {
    if (cols.length == 0 || gtCols.length == 0) return;
    gtCols.forEach(gtCol => {
        const gtOverlappingBlocks = alignmentHub[gtCol];
        for (let [index, col] of cols.entries()) {
            const overlappingBlocks = alignmentHub[col];
            if (gtOverlappingBlocks == undefined || overlappingBlocks == undefined) continue;;
            alignmentHub[col] = overlappingBlocks.filter(overlappingBlock => {
                for (let gtOverlappingBlock of gtOverlappingBlocks) {
                    if (gtOverlappingBlock.includes(overlappingBlock)) {
                        return false
                    }
                }
                return true;
            })
        }
    })
    // 清理掉value为空数组项
    // {1: [],2:["好用"]} -to-> {2:["好用"]}
    Object.keys(alignmentHub).forEach(key => { if (alignmentHub[key].length == 0) delete alignmentHub[key] });
}
// 对齐
function alignment(str1 = "", str2 = "") {
    // 返回 {"1",["我","用"],"2":["推荐","可以"]}
    if (str1.length != str2.length) {
        throw new Error("算法错误,对齐字符串长度不一致");
    }
    const overlappingBlocks = {}
    let currentOverlappingBlock = "";
    for (let i = str1.length - 1; i >= 0; i--) {
        if (str1[i] == str2[i]) {
            currentOverlappingBlock = str1[i] + currentOverlappingBlock;
            if (i - 1 >= 0) continue;
        }
        if (currentOverlappingBlock.length == 0) {
            continue;
        } else {
            // 块收集
            let currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length]
            if (currentOverlappingBlockContainer == undefined) {
                currentOverlappingBlockContainer = overlappingBlocks[currentOverlappingBlock.length] = [currentOverlappingBlock]
            } else if (!currentOverlappingBlockContainer.includes(currentOverlappingBlock)) {
                currentOverlappingBlockContainer.push(currentOverlappingBlock)
            }
        }
        currentOverlappingBlock = "";
    }
    return overlappingBlocks;
}

// 【同步算法-堆实现】
function sortAndSync(arr1, arr2, order = 'desc') {
    if (arr1.length !== arr2.length) {
        throw new Error("Both arrays must have the same length");
    }
    function swap(arr, i, j) {
        [arr[i], arr[j]] = [arr[j], arr[i]];
    }
    function heapify(arr1, arr2, n, i, order) {
        let largest = i;
        let left = 2 * i + 1;
        let right = 2 * i + 2;

        if (order === 'asc') {
            if (left < n && arr1[left] > arr1[largest]) {
                largest = left;
            }
            if (right < n && arr1[right] > arr1[largest]) {
                largest = right;
            }
        } else {

            if (left < n && arr1[left] < arr1[largest]) {
                largest = left;
            }
            if (right < n && arr1[right] < arr1[largest]) {
                largest = right;
            }
        }

        if (largest !== i) {
            swap(arr1, i, largest);
            swap(arr2, i, largest);
            heapify(arr1, arr2, n, largest, order);
        }
    }
    function buildHeap(arr1, arr2, n, order) {
        for (let i = Math.floor(n / 2) - 1; i >= 0; i--) {
            heapify(arr1, arr2, n, i, order);
        }
    }
    function heapSort(arr1, arr2, order) {
        let n = arr1.length;
        buildHeap(arr1, arr2, n, order);

        for (let i = n - 1; i > 0; i--) {
            swap(arr1, 0, i);
            swap(arr2, 0, i);
            heapify(arr1, arr2, i, 0, order);
        }
    }
    heapSort(arr1, arr2, order);
}

// 【算法测试1】
//  console.log("-- 算法测试开始 --")
//  console.log(findOverlapBlocks("[推荐]sumatrapdf非常好用","pdf 推荐"))
//  console.log("-- 算法测试结束 --")

// 【算法测试2】
// console.log("匹配度:", overlapMatchingDegree("好用的pdf工具", { "sumatrapdf": 10, "小而好用的pdf阅读器": 8, "https://www.sumatrapdfreader.org/downloadafter": 3 }));

})();