Paper links to free PDFs

Checks for free PDFs from Google Scholar, Sci-Hub, LibGen, Anna's Archive, Sci-net, Semantic Scholar, Unpaywall, and Openrxiv (medRxiv/bioRxiv) when hovering over a DOI link.

2025-10-22 يوللانغان نەشرى. ئەڭ يېڭى نەشرىنى كۆرۈش.

// ==UserScript==
// @name         Paper links to free PDFs
// @namespace    greatest.deepsurf.us
// @version      1.9
// @description  Checks for free PDFs from Google Scholar, Sci-Hub, LibGen, Anna's Archive, Sci-net, Semantic Scholar, Unpaywall, and Openrxiv (medRxiv/bioRxiv) when hovering over a DOI link.
// @author       Bui Quoc Dung
// @match        *://*/*
// @grant        GM.xmlHttpRequest
// @connect      *
// @license      AGPL-3.0-or-later
// ==/UserScript==

(function () {
'use strict';

const SCIHUB_URL = 'https://tesble.com/';
const LIBGEN_URL = 'https://libgen.bz/';
const LIBGEN_SEARCH_URL = LIBGEN_URL + 'index.php?req=';
const ANNA_URL = 'https://annas-archive.org';
const ANNA_SCIDB_URL = ANNA_URL + '/scidb/';
const ANNA_CHECK_URL = ANNA_URL + '/search?index=journals&q=';
const SCINET_URL = 'https://sci-net.xyz/';
const GOOGLE_SCHOLAR_URL = 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=';
const UNPAYWALL_EMAIL = '[email protected]';

const DOI_REGEX = /\b(10\.\d{4,}(?:\.\d+)*\/(?:(?!["&'<>])\S)+)\b/i;

const styles = `
    .doi-enhancer-popup {
        position: absolute; z-index: 9999; background-color: white;
        border: 1px solid #ccc; border-radius: 6px; padding: 6px;
        box-shadow: 0 4px 8px rgba(0,0,0,0.2); font-family: sans-serif;
        font-size: 13px; max-width: 600px;
    }
    .doi-enhancer-popup .doi-header {
        margin-bottom: 6px; color: #333; word-break: break-word;
        padding-left: 7px;
    }
    .doi-enhancer-popup table { border-collapse: collapse; width: 100%; }
    .doi-enhancer-popup td {
        padding: 4px 6px; text-align: center;
        border-right: 1px solid #eee; white-space: nowrap;
    }
    .doi-enhancer-popup td:last-child { border-right: none; }
    .doi-enhancer-popup a { color: #007bff; text-decoration: none; }
    .doi-enhancer-popup a:hover { text-decoration: underline; }
    .doi-enhancer-popup .status-no a { color: #888; }
    .doi-enhancer-popup .status-checking { color: #999; }
`;
const styleEl = document.createElement('style');
styleEl.textContent = styles;
document.head.appendChild(styleEl);

let currentPopup = null;
let hideTimeout = null;

function httpRequest(details) {
    return new Promise((resolve, reject) => {
        GM.xmlHttpRequest({
            ...details,
            timeout: 15000,
            onload: resolve,
            onerror: reject,
            ontimeout: reject,
        });
    });
}

function updateLink(cell, text, href, isNo = false) {
    cell.innerHTML = '';
    const link = document.createElement('a');
    link.href = href;
    link.target = '_blank';
    link.rel = 'noopener noreferrer';
    link.innerHTML = text.replace('[PDF]', '<b>[PDF]</b>').replace('[Maybe]', '<b>[Maybe]</b>');
    cell.className = isNo ? 'status-no' : 'status-yes';
    cell.appendChild(link);
}

async function checkGoogleScholar(doi, cell) {
    const url = GOOGLE_SCHOLAR_URL + encodeURIComponent(doi);
    try {
        const res = await httpRequest({ method: 'GET', url });
        const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
        const gsLink = doc.querySelector('.gs_or_ggsm a');
        if (gsLink) updateLink(cell, '[PDF] Google', gsLink.href);
        else updateLink(cell, '[No] Google', url, true);
    } catch {
        updateLink(cell, '[No] Google', url, true);
    }
}

async function checkSciHub(doi, cell) {
    const url = SCIHUB_URL + doi;
    try {
        const res = await httpRequest({ method: 'GET', url });
        const hasPDF = /iframe|embed/.test(res.responseText);
        updateLink(cell, hasPDF ? '[PDF] Sci-Hub' : '[No] Sci-Hub', url, !hasPDF);
    } catch {
        updateLink(cell, '[No] Sci-Hub', url, true);
    }
}

async function checkLibgen(doi, cell) {
    const url = LIBGEN_SEARCH_URL + encodeURIComponent(doi);
    try {
        const res = await httpRequest({ method: 'GET', url });
        const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
        const linkEl = doc.querySelector('.table.table-striped a[href^="edition.php?id="]');
        if (linkEl) {
            const detailUrl = LIBGEN_URL + linkEl.getAttribute('href');
            const detailRes = await httpRequest({ method: 'GET', url: detailUrl });
            const detailDoc = new DOMParser().parseFromString(detailRes.responseText, 'text/html');
            const hasPDF = !!detailDoc.querySelector('table');
            updateLink(cell, hasPDF ? '[PDF] LibGen' : '[No] LibGen', url, !hasPDF);
        } else updateLink(cell, '[No] LibGen', url, true);
    } catch {
        updateLink(cell, '[No] LibGen', url, true);
    }
}

async function checkAnna(doi, cell, retry = 0) {
    const checkUrl = ANNA_CHECK_URL + encodeURIComponent(doi);
    const directUrl = ANNA_SCIDB_URL + doi;
    try {
        const res = await httpRequest({ method: 'GET', url: checkUrl });
        const text = res.responseText;
        if (text.includes("Rate limited") && retry < 10) {
            setTimeout(() => checkAnna(doi, cell, retry + 1), 5000);
            return;
        }
        const doc = new DOMParser().parseFromString(text, 'text/html');
        const found = doc.querySelector('.mt-4.uppercase.text-xs.text-gray-500') ||
                      [...doc.querySelectorAll('div.text-gray-500')].some(div => div.textContent.includes(doi));
        if (found) {
            const res2 = await httpRequest({ method: 'GET', url: directUrl });
            const doc2 = new DOMParser().parseFromString(res2.responseText, 'text/html');
            const hasPDF = doc2.querySelector('.pdfViewer, #viewerContainer, iframe[src*="viewer.html?file="]');
            updateLink(cell, hasPDF ? '[PDF] Anna' : '[Maybe] Anna', directUrl);
        } else updateLink(cell, '[No] Anna', checkUrl, true);
    } catch {
        updateLink(cell, '[No] Anna', checkUrl, true);
    }
}

async function checkSciNet(doi, cell) {
    const url = SCINET_URL + doi;
    try {
        const res = await httpRequest({ method: 'GET', url });
        const hasPDF = /iframe|pdf|embed/.test(res.responseText);
        updateLink(cell, hasPDF ? '[PDF] Sci-net' : '[No] Sci-net', url, !hasPDF);
    } catch {
        updateLink(cell, '[No] Sci-net', url, true);
    }
}

async function checkSemanticScholar(doi, cell) {
    const apiUrl = `https://api.semanticscholar.org/graph/v1/paper/${encodeURIComponent(doi)}?fields=title,openAccessPdf`;
    try {
        const res = await httpRequest({ method: 'GET', url: apiUrl });
        const data = JSON.parse(res.responseText);
        const pdfUrl = data?.openAccessPdf?.url;
        if (pdfUrl) updateLink(cell, '[PDF] Semantic', pdfUrl);
        else updateLink(cell, '[No] Semantic', `https://www.semanticscholar.org/search?q=${encodeURIComponent(doi)}`, true);
    } catch {
        updateLink(cell, '[No] Semantic', `https://www.semanticscholar.org/search?q=${encodeURIComponent(doi)}`, true);
    }
}

async function checkUnpaywall(doi, cell) {
    const url = `https://api.unpaywall.org/v2/${encodeURIComponent(doi)}?email=${encodeURIComponent(UNPAYWALL_EMAIL)}`;
    try {
        const res = await httpRequest({ method: 'GET', url });
        const data = JSON.parse(res.responseText);
        if (data?.is_oa && data.best_oa_location?.url)
            updateLink(cell, '[PDF] Unpaywall', data.best_oa_location.url);
        else updateLink(cell, '[No] Unpaywall', `https://unpaywall.org/`, true);
    } catch {
        updateLink(cell, '[No] Unpaywall', `https://unpaywall.org/`, true);
    }
}


async function checkOpenRxiv(doi, cell) {
    const doiUrl = `https://doi.org/${doi}`;
    try {
        const res = await httpRequest({ method: 'GET', url: doiUrl, redirect: 'follow' });
        if (res.finalUrl) {
            const final = res.finalUrl;
            if (final.includes('biorxiv.org')) {
                const pdfUrl = final.replace(/\/$/, '') + '.full.pdf';
                updateLink(cell, '[PDF] BioRxiv', pdfUrl);
                return;
            }
            if (final.includes('medrxiv.org')) {
                const pdfUrl = final.replace(/\/$/, '') + '.full.pdf';
                updateLink(cell, '[PDF] MedRxiv', pdfUrl);
                return;
            }
        }
    } catch (e) {
    }
    updateLink(cell, '[No]', `https://doi.org/${doi}`, true);
    cell.innerHTML += `
        <a href="https://www.biorxiv.org/content/${doi}" target="_blank">BioRxiv</a>
        <a href="https://www.medrxiv.org/content/${doi}" target="_blank">MedRxiv</a>`;
}



function removeCurrentPopup() {
    if (currentPopup) { currentPopup.remove(); currentPopup = null; }
}

async function getDoiFromLink(linkElement) {
    if (linkElement.dataset.doi) return linkElement.dataset.doi;
    if (linkElement.dataset.doiFailed) return null;

    const url = linkElement.href;
    const keywords = ['doi', 'article', 'journal', 'abs', 'content', 'abstract', 'pubmed', 'document', 'fulltext', 'research', 'mdpi', 'springer'];
    if (!keywords.some(k => url.toLowerCase().includes(k))) {
        linkElement.dataset.doiFailed = 'true';
        return null;
    }
    const cleanDOI = doi => {
        const match = doi.match(/^10\.\d{4,}(?:\.\d+)*\/[^\s\/?#]+/);
        return match ? match[0] : doi.trim();
    };
    let doi = url.match(/\/doi\/(?:abs|full|pdf)?\/(10\.\d{4,}(?:\.\d+)*\/[^?#]+)/i)?.[1] ||
              url.match(/10\.\d{4,}(?:\.\d+)*\/[^\s"'<>{}[\]|\\^`]+/)?.[0];
    if (!doi) {
        try {
            const res = await httpRequest({ method: 'GET', url });
            const html = res.responseText;
            doi = html.match(/10\.\d{4,}(?:\.\d+)*\/[^\s"'<>{}[\]()]+/)?.[0];
        } catch {}
    }

    if (doi) {
        linkElement.dataset.doi = cleanDOI(doi);
        return linkElement.dataset.doi;
    } else {
        linkElement.dataset.doiFailed = 'true';
        return null;
    }
}

function showPopup(linkElement, doi, mouseX, mouseY) {
    clearTimeout(hideTimeout);
    removeCurrentPopup();

    const popup = document.createElement('div');
    popup.className = 'doi-enhancer-popup';
    currentPopup = popup;

    const doiLine = document.createElement('div');
    doiLine.className = 'doi-header';
    doiLine.textContent = `DOI: ${doi}`;
    popup.appendChild(doiLine);

    const table = document.createElement('table');

    const row1 = table.insertRow();
    const cellGS = row1.insertCell(); cellGS.textContent = '...'; cellGS.className = 'status-checking';
    const cellSemantic = row1.insertCell(); cellSemantic.textContent = '...'; cellSemantic.className = 'status-checking';
    const cellUnpay = row1.insertCell(); cellUnpay.textContent = '...'; cellUnpay.className = 'status-checking';

    const row2 = table.insertRow();
    const cellSH = row2.insertCell(); cellSH.textContent = '...'; cellSH.className = 'status-checking';
    const cellLG = row2.insertCell(); cellLG.textContent = '...'; cellLG.className = 'status-checking';
    const cellAnna = row2.insertCell(); cellAnna.textContent = '...'; cellAnna.className = 'status-checking';

    const row3 = table.insertRow();
    const cellSci = row3.insertCell(); cellSci.textContent = '...'; cellSci.className = 'status-checking';
    const cellOpenRxiv = row3.insertCell(); cellOpenRxiv.textContent = '...'; cellOpenRxiv.className = 'status-checking';
    const cellDummy = row3.insertCell(); cellDummy.textContent = ''; cellDummy.className = 'status-no';

    popup.appendChild(table);

    checkGoogleScholar(doi, cellGS);
    checkSemanticScholar(doi, cellSemantic);
    checkUnpaywall(doi, cellUnpay);
    checkSciHub(doi, cellSH);
    checkLibgen(doi, cellLG);
    checkAnna(doi, cellAnna);
    checkSciNet(doi, cellSci);
    checkOpenRxiv(doi, cellOpenRxiv);

    popup.addEventListener('mouseenter', () => clearTimeout(hideTimeout));
    popup.addEventListener('mouseleave', () => removeCurrentPopup());

    document.body.appendChild(popup);
    popup.style.top = `${mouseY + 10}px`;
    popup.style.left = `${mouseX + 10}px`;
}

document.addEventListener('mouseover', async (event) => {
    const link = event.target.closest('a');
    if (!link || !link.href || link.dataset.doiCheckInProgress) return;
    if (link.closest('.doi-enhancer-popup')) return;

    clearTimeout(hideTimeout);
    removeCurrentPopup();

    link.dataset.doiCheckInProgress = 'true';

    try {
        const doi = await getDoiFromLink(link);
        if (doi) {
            link.addEventListener('mouseenter', (e) => showPopup(link, doi, e.pageX, e.pageY));
            link.addEventListener('mouseleave', () => hideTimeout = setTimeout(removeCurrentPopup, 150));
            showPopup(link, doi, event.pageX, event.pageY);
        }
    } finally {
        link.removeAttribute('data-doiCheckInProgress');
    }
});

})();