Scholar's Toolkit

Checks for free PDFs from Google Scholar, Sci-Hub, LibGen, Anna's Archive, Sci-net, Semantic Scholar, Unpaywall, Openrxiv (medRxiv/bioRxiv), and ArXiv. When hovering a DOI, it also displays journal name, ISSN, publisher, metrics (SJR, H-Index, JIF), and integrity status (PubPeer, Retraction Database, Beall's Predatory List).

// ==UserScript==
// @name         Scholar's Toolkit
// @namespace    greatest.deepsurf.us
// @version      3.0
// @description Checks for free PDFs from Google Scholar, Sci-Hub, LibGen, Anna's Archive, Sci-net, Semantic Scholar, Unpaywall, Openrxiv (medRxiv/bioRxiv), and ArXiv. When hovering a DOI, it also displays journal name, ISSN, publisher, metrics (SJR, H-Index, JIF), and integrity status (PubPeer, Retraction Database, Beall's Predatory List).
// @author       Bui Quoc Dung (with community contributions)
// @match        *://*/*
// @grant        GM_xmlhttpRequest
// @connect      *
// @license      AGPL-3.0-or-later
// ==/UserScript==

(function () {
'use strict';

const styles = `
.doi-enhancer-popup { position: absolute; z-index: 9999; background-color: white;
border: 1px solid #ccc; border-radius: 6px; padding: 6px;
box-shadow: 0 4px 8px rgba(0,0,0,0.2); font-family: sans-serif;
font-size: 13px; max-width: 600px; }
.doi-enhancer-popup .doi-header { margin-bottom: 6px; color: #333; word-break: break-word; padding-left: 7px; }
.doi-enhancer-popup table { border-collapse: collapse; width: 100%; margin-top: 6px; }
.doi-enhancer-popup td { padding: 4px 6px; text-align: center; border-right: 1px solid #eee; white-space: nowrap; }
.doi-enhancer-popup td:last-child { border-right: none; }
.doi-enhancer-popup a { color: #007bff; text-decoration: none; }
.doi-enhancer-popup a:hover { text-decoration: underline; }
.doi-enhancer-popup .status-no a { color: #888; }
.doi-enhancer-popup .status-checking { color: #999; }
`;
const styleEl = document.createElement('style');
styleEl.textContent = styles;
document.head.appendChild(styleEl);

let currentPopup = null;
let hideTimeout = null;

function httpRequest(details) {
    return new Promise((resolve, reject) => {
        GM_xmlhttpRequest({
            ...details,
            timeout: 15000,
            onload: resolve,
            onerror: reject,
            ontimeout: reject,
        });
    });
}

async function httpGet(url) {
    return await httpRequest({ method: 'GET', url });
}

function updateLink(cell, text, href, isNo = false) {
    cell.innerHTML = '';
    const link = document.createElement('a');
    link.href = href;
    link.target = '_blank';
    link.rel = 'noopener noreferrer';
    link.innerHTML = text.replace(/\[(PDF|Maybe|COM|RET|PRE)\]/g, '<b>$&</b>');
    cell.className = isNo ? 'status-no' : 'status-yes';
    cell.appendChild(link);
}

async function fetchCrossref(doi) {
    try {
      const r = await httpGet(`https://api.crossref.org/works/${doi}`);
      const js = JSON.parse(r.responseText).message;
      return {
        journal: (js["container-title"]?.[0] || "").replace(/&amp;/g, '&'),
        publisher: (js.publisher || "").replace(/&amp;/g, '&'),
        issn: js.ISSN?.[0] || ""
      };
    } catch(e) { return {}; }
}

function querySJRByISSN(issn, cb){
    const SJR_SEARCH_URL = 'https://www.scimagojr.com/journalsearch.php?q=';
    const SJR_BASE_URL = 'https://www.scimagojr.com/';
    if(!issn) return cb(null);
    GM_xmlhttpRequest({
      method:'GET', url:SJR_SEARCH_URL+encodeURIComponent(issn),
      onload:res=>{
        const doc=new DOMParser().parseFromString(res.responseText,"text/html");
        const link=doc.querySelector('.search_results a'); if(!link) return cb(null);
        const url=SJR_BASE_URL+link.getAttribute('href');
        GM_xmlhttpRequest({
          method:'GET',url,
          onload:r2=>{
            const d=new DOMParser().parseFromString(r2.responseText,"text/html");
            const ps=d.querySelectorAll('p.hindexnumber'); if(ps.length<2) return cb(null);
            const sjr=ps[0].childNodes[0]?.textContent.trim();
            const quart=ps[0].querySelector('span')?.textContent.trim();
            const h=ps[1].textContent.trim();
            let text = sjr ? `SJR: ${sjr}` : "SJR: N/A";
            if(quart) text += ` (${quart})`;
            if(h) text += ` | H-index:  ${h}`;
            cb({text,link:url});
          },
          onerror:()=>cb(null)
        });
      },
      onerror:()=>cb(null)
    });
}

function queryJIFByISSN(issn,cb){
    const WOS_JOURNAL_URL = 'https://wos-journal.info/?jsearch=';
    if(!issn) return cb(null);
    GM_xmlhttpRequest({
      method:'GET', url:WOS_JOURNAL_URL+encodeURIComponent(issn),
      onload:res=>{
        const doc=new DOMParser().parseFromString(res.responseText,"text/html");
        const t=doc.querySelectorAll('.title.col-4.col-md-3');
        const c=doc.querySelectorAll('.content.col-8.col-md-9');
        if(!t.length||t.length!==c.length) return cb(null);
        let j=null;
        for(let i=0;i<t.length;i++){
          if(t[i].textContent.trim()==='Journal Impact Factor (JIF):'){
            j=c[i].textContent.trim(); break;
          }
        }
        cb(j && !isNaN(j) ? {value:j,link:WOS_JOURNAL_URL+issn} : null);
      },
      onerror:()=>cb(null)
    });
}


async function checkBeallsList(journalName, publisherName, cell) {
    const mainUrl = 'https://beallslist.net/';
    if (!journalName && !publisherName) {
        return updateLink(cell, '[No] Beall', mainUrl, true);
    }

    const getList = async (url, cacheKey, selector = ".entry-content ul li") => {
        const cached = sessionStorage.getItem(cacheKey);
        if (cached) return new Set(JSON.parse(cached));

        const response = await httpRequest({ method: 'GET', url });
        const doc = new DOMParser().parseFromString(response.responseText, "text/html");
        const items = doc.querySelectorAll(selector);
        const itemSet = new Set();
        items.forEach(item => {
            const text = item.textContent.trim().toLowerCase();
            if (text) itemSet.add(text);
        });
        sessionStorage.setItem(cacheKey, JSON.stringify(Array.from(itemSet)));
        return itemSet;
    };

    try {
        const [journalSet, publisherSet] = await Promise.all([
            getList('https://beallslist.net/standalone-journals/', 'beallsListJournals'),
            getList('https://beallslist.net/', 'beallsListPublishers')
        ]);

        const lowerJournal = journalName ? journalName.toLowerCase() : '';
        if (lowerJournal && journalSet.has(lowerJournal)) {
            updateLink(cell, '[PRE] Beall', mainUrl, false);
            return;
        }

        const lowerPublisher = publisherName ? publisherName.toLowerCase() : '';
        if (lowerPublisher) {
            for (const predatoryPublisher of publisherSet) {
                if (predatoryPublisher.includes(lowerPublisher) || lowerPublisher.includes(predatoryPublisher)) {
                    updateLink(cell, '[PRE] Beall', mainUrl, false);
                    return;
                }
            }
        }

        updateLink(cell, '[No] Beall', mainUrl, true);

    } catch (error) {
        console.error("Error checking Beall's List:", error);
        updateLink(cell, '[No] Beall', mainUrl, true);
    }
}


function checkPubPeer(doi, cell) {
    const encodedDoi = encodeURIComponent(doi);
    const searchPageUrl = `https://pubpeer.com/search?q=${encodedDoi}`;
    const pubpeerUrl = `https://pubpeer.com/search?q=${encodedDoi}`;

    const updateFail = () => updateLink(cell, '[No]PubPeer', pubpeerUrl, true);

    GM_xmlhttpRequest({
        method: 'GET', url: searchPageUrl,
        onload: function(response) {
            const parser = new DOMParser();
            const doc = parser.parseFromString(response.responseText, "text/html");
            const metaTag = doc.querySelector('meta[name="csrf-token"]');
            if (!metaTag) { return updateFail(); }

            const token = metaTag.getAttribute('content');
            const apiUrl = `https://pubpeer.com/api/search/?q=${encodedDoi}&token=${token}`;

            GM_xmlhttpRequest({
                method: 'GET', url: apiUrl, responseType: 'json',
                onload: function(apiResponse) {
                    try {
                        const data = apiResponse.response;
                        if (data && data.publications && data.publications.length > 0) {
                            const commentCount = data.publications[0].comments_total;
                            const result = commentCount > 0 ? `[COM] PubPeer (${commentCount})` : '[No] PubPeer';
                            updateLink(cell, result, pubpeerUrl, commentCount === 0);
                        } else { updateFail(); }
                    } catch (e) { updateFail(); }
                },
                onerror: updateFail
            });
        },
        onerror: updateFail
    });
}


async function checkRetraction(doi, cell) {
    const rwdUrl = 'https://retractiondatabase.org/RetractionSearch.aspx';
    try {
        const r = await httpGet(`https://api.crossref.org/works/${doi}`);
        const js = JSON.parse(r.responseText).message;
        const isRetracted = (js['updated-by'] || []).some(u => u.type === 'retraction');

        if (isRetracted) {
            updateLink(cell, '[RET] RWD', rwdUrl, false);
        } else {
            updateLink(cell, '[No] RWD', rwdUrl, true);
        }
    } catch (e) {
        updateLink(cell, '[No] RWD', rwdUrl, true);
    }
}


async function checkGoogleScholar(doi, cell) {
    const GOOGLE_SCHOLAR_URL = 'https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=';
    const url = GOOGLE_SCHOLAR_URL + encodeURIComponent(doi);
    try {
        const res = await httpRequest({ method: 'GET', url });
        const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
        const gsLink = doc.querySelector('.gs_or_ggsm a');
        if (gsLink) updateLink(cell, '[PDF] Google', gsLink.href);
        else updateLink(cell, '[No] Google', url, true);
    } catch {
        updateLink(cell, '[No] Google', url, true);
    }
}

async function checkSciHub(doi, cell) {
    const SCIHUB_URL = 'https://tesble.com/';
    const url = SCIHUB_URL + doi;
    try {
        const res = await httpRequest({ method: 'GET', url });
        const hasPDF = /iframe|embed/.test(res.responseText);
        updateLink(cell, hasPDF ? '[PDF] Sci-Hub' : '[No] Sci-Hub', url, !hasPDF);
    } catch {
        updateLink(cell, '[No] Sci-Hub', url, true);
    }
}

async function checkLibgen(doi, cell) {
    const LIBGEN_URL = 'https://libgen.bz/';
    const LIBGEN_SEARCH_URL = LIBGEN_URL + 'index.php?req=';
    const url = LIBGEN_SEARCH_URL + encodeURIComponent(doi);
    try {
        const res = await httpRequest({ method: 'GET', url });
        const doc = new DOMParser().parseFromString(res.responseText, 'text/html');
        const linkEl = doc.querySelector('.table.table-striped a[href^="edition.php?id="]');
        if (linkEl) {
            const detailUrl = LIBGEN_URL + linkEl.getAttribute('href');
            const detailRes = await httpRequest({ method: 'GET', url: detailUrl });
            const detailDoc = new DOMParser().parseFromString(detailRes.responseText, 'text/html');
            const hasPDF = !!detailDoc.querySelector('table');
            updateLink(cell, hasPDF ? '[PDF] LibGen' : '[No] LibGen', url, !hasPDF);
        } else updateLink(cell, '[No] LibGen', url, true);
    } catch {
        updateLink(cell, '[No] LibGen', url, true);
    }
}

async function checkAnna(doi, cell, retry = 0) {
    const ANNA_URL = 'https://annas-archive.org';
    const ANNA_SCIDB_URL = ANNA_URL + '/scidb/';
    const ANNA_CHECK_URL = ANNA_URL + '/search?index=journals&q=';
    const checkUrl = ANNA_CHECK_URL + encodeURIComponent(doi);
    const directUrl = ANNA_SCIDB_URL + doi;
    try {
        const res = await httpRequest({ method: 'GET', url: checkUrl });
        const text = res.responseText;
        if (text.includes("Rate limited") && retry < 10) {
            setTimeout(() => checkAnna(doi, cell, retry + 1), 5000);
            return;
        }
        const doc = new DOMParser().parseFromString(text, 'text/html');
        const found = doc.querySelector('.mt-4.uppercase.text-xs.text-gray-500') ||
                      [...doc.querySelectorAll('div.text-gray-500')].some(div => div.textContent.includes(doi));
        if (found) {
            const res2 = await httpRequest({ method: 'GET', url: directUrl });
            const doc2 = new DOMParser().parseFromString(res2.responseText, 'text/html');
            const hasPDF = doc2.querySelector('.pdfViewer, #viewerContainer, iframe[src*="viewer.html?file="]');
            updateLink(cell, hasPDF ? '[PDF] Anna' : '[Maybe] Anna', directUrl);
        } else updateLink(cell, '[No] Anna', checkUrl, true);
    } catch {
        updateLink(cell, '[No] Anna', checkUrl, true);
    }
}

async function checkSciNet(doi, cell) {
    const SCINET_URL = 'https://sci-net.xyz/';
    const url = SCINET_URL + doi;
    try {
        const res = await httpRequest({ method: 'GET', url });
        const hasPDF = /iframe|pdf|embed/.test(res.responseText);
        updateLink(cell, hasPDF ? '[PDF] Sci-net' : '[No] Sci-net', url, !hasPDF);
    } catch {
        updateLink(cell, '[No] Sci-net', url, true);
    }
}

async function checkSemanticScholar(doi, cell) {
    const apiUrl = `https://api.semanticscholar.org/graph/v1/paper/${encodeURIComponent(doi)}?fields=title,openAccessPdf`;
    try {
        const res = await httpRequest({ method: 'GET', url: apiUrl });
        const data = JSON.parse(res.responseText);
        const pdfUrl = data?.openAccessPdf?.url;
        if (pdfUrl) updateLink(cell, '[PDF] Semantic', pdfUrl);
        else updateLink(cell, '[No] Semantic', `https://www.semanticscholar.org/search?q=${encodeURIComponent(doi)}`, true);
    } catch {
        updateLink(cell, '[No] Semantic', `https://www.semanticscholar.org/search?q=${encodeURIComponent(doi)}`, true);
    }
}

async function checkUnpaywall(doi, cell) {
    const UNPAYWALL_EMAIL = '[email protected]';
    const url = `https://api.unpaywall.org/v2/${encodeURIComponent(doi)}?email=${encodeURIComponent(UNPAYWALL_EMAIL)}`;
    try {
        const res = await httpRequest({ method: 'GET', url });
        const data = JSON.parse(res.responseText);
        if (data?.is_oa && data.best_oa_location?.url)
            updateLink(cell, '[PDF] Unpaywall', data.best_oa_location.url);
        else updateLink(cell, '[No] Unpaywall', `https://unpaywall.org/`, true);
    } catch {
        updateLink(cell, '[No] Unpaywall', `https://unpaywall.org/`, true);
    }
}

async function checkOpenRxiv(doi, cell) {
    const doiUrl = `https://doi.org/${doi}`;
    try {
        const res = await httpRequest({ method: 'GET', url: doiUrl, redirect: 'follow' });
        if (res.finalUrl) {
            const final = res.finalUrl;
            if (final.includes('biorxiv.org')) {
                const pdfUrl = final.replace(/\/$/, '') + '.full.pdf';
                updateLink(cell, '[PDF] BioRxiv', pdfUrl);
                return;
            }
            if (final.includes('medrxiv.org')) {
                const pdfUrl = final.replace(/\/$/, '') + '.full.pdf';
                updateLink(cell, '[PDF] MedRxiv', pdfUrl);
                return;
            }
        }
    } catch (e) {}
    updateLink(cell, '[No]', `https://doi.org/${doi}`, true);
    cell.innerHTML += `
        <a href="https://www.biorxiv.org/content/${doi}" target="_blank">BioRxiv</a>
        <a href="https://www.medrxiv.org/content/${doi}" target="_blank">MedRxiv</a>`;
}

async function checkArxiv(doi, cell) {
    const baseUrl = 'https://arxiv.org';
    const searchUrl = `${baseUrl}/search/?query=${encodeURIComponent(doi)}&searchtype=all`;
    if (!doi.includes('10.48550/arXiv.'))
        return updateLink(cell, '[No] ArXiv', searchUrl, true);
    const arxivId = doi.replace(/^10\.48550\/arXiv\./i, '');
    const pdfUrl = `${baseUrl}/pdf/${arxivId}.pdf`;
    try {
        const { status } = await httpRequest({ method: 'HEAD', url: pdfUrl });
        updateLink(cell, status === 200 ? '[PDF] ArXiv' : '[No] ArXiv',
                        status === 200 ? pdfUrl : searchUrl, status !== 200);
    } catch {
        updateLink(cell, '[No] ArXiv', searchUrl, true);
    }
}

function removeCurrentPopup() {
    if (currentPopup) { currentPopup.remove(); currentPopup = null; }
}

async function getDoiFromLink(linkElement) {
    const DOI_REGEX = /\b(10\.\d{4,}(?:\.\d+)*\/[^\s?#"&]+)/i;
    if (linkElement.dataset.doi) return linkElement.dataset.doi;
    if (linkElement.dataset.doiFailed) return null;
    const url = linkElement.href.toLowerCase();
    const keywords = [
        'doi','article','journal','abs','content','abstract',
        'pubmed','document','fulltext','research','mdpi','springer'
    ];
    if (!keywords.some(k => url.includes(k))) {
        linkElement.dataset.doiFailed = 'true';
        return null;
    }
    const cleanDOI = doi => (doi.match(DOI_REGEX)?.[1]?.trim() ?? doi.trim()).replace(/\/(meta|full|abs|pdf)\/?$/i, "").replace(/%e2%81%a9/gi, '').replace(/[.,]+$/, '').replace(/\/[a-zA-Z0-9-]{20,}\/?$/i, '');
    let doi =
        url.match(DOI_REGEX)?.[1];
    if (!doi) {
        try {
            const res = await httpRequest({ method: 'GET', url: linkElement.href });
            doi = res.responseText.match(DOI_REGEX)?.[1];
        } catch {}
    }
    if (doi) {
        const final = cleanDOI(doi);
        linkElement.dataset.doi = final;
        return final;
    } else {
        linkElement.dataset.doiFailed = 'true';
        return null;
    }
}

function showPopup(linkElement, doi, mouseX, mouseY) {
    clearTimeout(hideTimeout);
    removeCurrentPopup();
    const popup = document.createElement('div');
    popup.className = 'doi-enhancer-popup';
    currentPopup = popup;

    const doiLine = document.createElement('div');
    doiLine.className = 'doi-header';
    doiLine.textContent = `DOI: ${doi}`;
    popup.appendChild(doiLine);

    const journalLine = document.createElement('div');
    journalLine.className = 'doi-header';
    journalLine.textContent = '...';
    popup.appendChild(journalLine);

    const metricsLine = document.createElement('div');
    metricsLine.className = 'doi-header';
    metricsLine.innerHTML = `<span>SJR: ...</span> | <span>JIF: ...</span>`;
    popup.appendChild(metricsLine);

    (async () => {
        const { journal, issn, publisher } = await fetchCrossref(doi);
        journalLine.textContent = `${journal || 'N/A'} | ISSN: ${issn || 'N/A'}`;
        if (cellRefs.Beall) {
            checkBeallsList(journal, publisher, cellRefs.Beall);
        }

        if (!issn) {
            metricsLine.innerHTML = `<span>SJR: N/A</span> | <span>JIF: N/A</span>`;
            return;
        }
        querySJRByISSN(issn, (sjrData) => {
            const sjrSpan = metricsLine.querySelector('span:first-child');
            if (sjrData && sjrData.text) {
                sjrSpan.innerHTML = `<a href="${sjrData.link}" target="_blank" rel="noopener noreferrer">${sjrData.text}</a>`;
            } else {
                sjrSpan.textContent = 'SJR: N/A';
            }
        });
        queryJIFByISSN(issn, (jifData) => {
            const jifSpan = metricsLine.querySelector('span:last-child');
            if (jifData && jifData.value) {
                jifSpan.innerHTML = `<a href="${jifData.link}" target="_blank" rel="noopener noreferrer">JIF: ${jifData.value}</a>`;
            } else {
                jifSpan.textContent = 'JIF: N/A';
            }
        });
    })();

    const table = document.createElement('table');
    const rows = [
        ['RWD', 'PubPeer', 'Beall' ],
        ['GS', 'Semantic', 'Unpay' ],
        ['SH', 'LG', 'Anna' ],
        ['Sci', 'OpenRxiv' , 'Arxiv']
    ];

    const cellRefs = {};
    rows.forEach(rowKeys => {
        const row = table.insertRow();
        rowKeys.forEach(key => {
            const cell = row.insertCell();
            cell.textContent = '...';
            cell.className = 'status-checking';
            cellRefs[key] = cell;
        });
    });
    popup.appendChild(table);
    const checks = {
        PubPeer: checkPubPeer,
        RWD: checkRetraction,
        GS: checkGoogleScholar,
        Semantic: checkSemanticScholar,
        Unpay: checkUnpaywall,
        SH: checkSciHub,
        LG: checkLibgen,
        Anna: checkAnna,
        Sci: checkSciNet,
        OpenRxiv: checkOpenRxiv,
        Arxiv: checkArxiv
    };

    for (const key in checks) {
        if(cellRefs[key]) {
            checks[key](doi, cellRefs[key]);
        }
    }

    popup.addEventListener('mouseenter', () => clearTimeout(hideTimeout));
    popup.addEventListener('mouseleave', () => removeCurrentPopup());

    document.body.appendChild(popup);
    popup.style.top = `${mouseY + 10}px`;
    popup.style.left = `${mouseX + 10}px`;
}

document.addEventListener('mouseover', async (event) => {
    const link = event.target.closest('a');
    if (!link || !link.href || link.dataset.doiCheckInProgress || link.dataset.doiListenerAttached) return;
    if (link.closest('.doi-enhancer-popup')) return;
    clearTimeout(hideTimeout);
    removeCurrentPopup();
    link.dataset.doiCheckInProgress = 'true';
    try {
        const doi = await getDoiFromLink(link);
        if (doi) {
            link.dataset.doiListenerAttached = 'true';
            link.addEventListener('mouseenter', (e) => showPopup(link, doi, e.pageX, e.pageY));
            link.addEventListener('mouseleave', () => {
                hideTimeout = setTimeout(removeCurrentPopup, 150);
            });
            showPopup(link, doi, event.pageX, event.pageY);
        }
    } finally {
        link.removeAttribute('data-doiCheckInProgress');
    }
});

})();