您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.
// ==UserScript== // @name Scribd Enhancer All-in-One (v2.7.3) // @namespace https://greatest.deepsurf.us/users/Eliminater74 // @version 2.7.3 // @description Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74. // @author Eliminater74 // @license MIT // @match *://*.scribd.com/* // @grant none // @icon https://s-f.scribdassets.com/favicon.ico // ==/UserScript== (function () { 'use strict'; const SETTINGS_KEY = 'scribdEnhancerSettings'; const defaultSettings = { unblur: true, autoScrape: false, darkMode: false, showPreview: true, enableOCR: true, ocrLang: 'auto', splitEvery: 0 }; const settings = { ...defaultSettings, ...JSON.parse(localStorage.getItem(SETTINGS_KEY) || '{}') }; const saveSettings = () => localStorage.setItem(SETTINGS_KEY, JSON.stringify(settings)); const tesseractScript = document.createElement('script'); tesseractScript.src = 'https://cdn.jsdelivr.net/npm/[email protected]/dist/tesseract.min.js'; document.head.appendChild(tesseractScript); const style = document.createElement('style'); style.textContent = ` #se-ui { position: fixed; bottom: 20px; right: 20px; background: #222; color: #fff; border-radius: 10px; padding: 10px; z-index: 9999; width: 320px; font-family: sans-serif; font-size: 13px; box-shadow: 0 0 10px #000; } #se-ui label, #se-ui select, #se-ui button { display: block; width: 100%; margin: 4px 0; } #se-ui input[type="checkbox"] { margin-right: 6px; } #se-ui button { background: #444; color: white; border: none; border-radius: 6px; padding: 6px; } #se-preview { position: fixed; top: 10px; right: 20px; bottom: 140px; width: 360px; background: #f4f4f4; color: #000; overflow: auto; padding: 10px; font-family: monospace; font-size: 12px; white-space: pre-wrap; border: 1px solid #999; z-index: 9998; border-radius: 10px; } .dark-mode #se-preview { background: #222; color: #eee; border-color: #555; } .dark-mode * { background-color: transparent !important; color: #e0e0e0 !important; border-color: #444 !important; } `; document.head.appendChild(style); function applyDarkMode() { document.documentElement.classList.toggle('dark-mode', settings.darkMode); document.body.classList.toggle('dark-mode', settings.darkMode); } function unblurContent() { if (!settings.unblur) return; const cleanup = () => { document.querySelectorAll('.blurred_page, .promo_div, [unselectable="on"]').forEach(el => el.remove()); document.querySelectorAll('*').forEach(el => { const cs = getComputedStyle(el); if (cs.color === 'transparent') el.style.color = '#111'; if (cs.textShadow?.includes('white')) el.style.textShadow = 'none'; }); }; cleanup(); new MutationObserver(cleanup).observe(document.body, { childList: true, subtree: true }); } function cleanOCRText(text) { return text.split('\n').map(t => t.trim()).filter(line => line.length >= 3 && /[a-zA-Z]/.test(line) && !/^[^a-zA-Z0-9]{3,}$/.test(line) ).join('\n'); } function detectLanguage(text) { const map = { spa: /ñ|á|í|ó|ú/, fra: /é|è|ê|ç/, deu: /ä|ö|ü|ß/, ron: /ș|ț|ă|î|â/ }; for (const [lang, regex] of Object.entries(map)) { if (regex.test(text)) return lang; } return 'eng'; } async function preprocessImage(src) { return new Promise(resolve => { const img = new Image(); img.crossOrigin = 'anonymous'; img.onload = () => { if (img.naturalWidth < 100 || img.naturalHeight < 100 || /logo|icon|watermark/i.test(src)) return resolve(null); const canvas = document.createElement('canvas'); canvas.width = img.width; canvas.height = img.height; const ctx = canvas.getContext('2d'); ctx.drawImage(img, 0, 0); const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); for (let i = 0; i < imageData.data.length; i += 4) { const avg = (imageData.data[i] + imageData.data[i+1] + imageData.data[i+2]) / 3; imageData.data[i] = imageData.data[i+1] = imageData.data[i+2] = avg; } ctx.putImageData(imageData, 0, 0); resolve(canvas.toDataURL('image/png')); }; img.src = src; }); } function createPreview() { const preview = document.createElement('div'); preview.id = 'se-preview'; if (settings.showPreview) { preview.textContent = '[Preview Initialized]\n'; document.body.appendChild(preview); } return preview; } function exportOutput(content, ext) { const split = settings.splitEvery; const lines = content.split(/(?=\[Page \d+])/); if (!split || split < 1) { const blob = new Blob([content], { type: `text/${ext}` }); const a = document.createElement('a'); a.href = URL.createObjectURL(blob); a.download = `scribd_output.${ext}`; a.click(); } else { for (let i = 0; i < lines.length; i += split) { const chunk = lines.slice(i, i + split).join('\n'); const blob = new Blob([chunk], { type: `text/${ext}` }); const a = document.createElement('a'); a.href = URL.createObjectURL(blob); a.download = `scribd_part${Math.floor(i / split) + 1}.${ext}`; a.click(); } } } function printToPDF(content) { const win = window.open('', 'PrintView'); win.document.write(`<html><head><title>Scribd Print</title></head><body><pre>${content}</pre></body></html>`); win.document.close(); win.focus(); setTimeout(() => win.print(), 600); } async function scrapePages(pages, preview) { const concurrency = 4; let index = 0; const firstText = []; async function scrape(page, i) { page.scrollIntoView(); await new Promise(r => setTimeout(r, 300)); let found = false; const text = page.innerText.trim(); if (text) { preview.textContent += `[Page ${i + 1}] ✅\n${text}\n\n`; firstText.push(text); found = true; } if (settings.enableOCR && window.Tesseract) { const imgs = page.querySelectorAll('img'); for (let img of imgs) { const src = img.src || ''; const processed = await preprocessImage(src); if (!processed) continue; const lang = settings.ocrLang === 'auto' ? detectLanguage(firstText.join(' ')) : settings.ocrLang; const result = await window.Tesseract.recognize(processed, lang); const ocrText = cleanOCRText(result.data.text || ''); if (ocrText) { preview.textContent += `[OCR] ${ocrText}\n\n`; found = true; } } } if (!found) preview.textContent += `[Page ${i + 1}] ❌ No content\n\n`; } const tasks = Array(concurrency).fill(null).map(async () => { while (index < pages.length) { const i = index++; await scrape(pages[i], i); } }); await Promise.all(tasks); alert(`✅ Scraped ${pages.length} pages.`); } function createUI(preview) { const ui = document.createElement('div'); ui.id = 'se-ui'; ui.innerHTML = ` <label><input type="checkbox" id="opt-unblur"> Unblur</label> <label><input type="checkbox" id="opt-autoscrape"> Auto Scrape</label> <label><input type="checkbox" id="opt-dark"> Dark Mode</label> <label><input type="checkbox" id="opt-preview"> Show Preview</label> <label>OCR: <select id="opt-lang"> <option value="auto">Auto</option> <option value="eng">English</option> <option value="spa">Spanish</option> <option value="fra">French</option> <option value="deu">German</option> </select> </label> <label>Split Every: <select id="opt-split"> <option value="0">Off</option> <option value="100">100</option> <option value="250">250</option> <option value="500">500</option> </select> </label> <button id="btn-scrape">📖 Scrape Pages</button> <button id="btn-export">💾 Export TXT</button> <button id="btn-html">🧾 Export HTML</button> <button id="btn-print">🖨️ Print to PDF</button> `; document.body.appendChild(ui); ui.querySelector('#opt-unblur').checked = settings.unblur; ui.querySelector('#opt-autoscrape').checked = settings.autoScrape; ui.querySelector('#opt-dark').checked = settings.darkMode; ui.querySelector('#opt-preview').checked = settings.showPreview; ui.querySelector('#opt-lang').value = settings.ocrLang; ui.querySelector('#opt-split').value = settings.splitEvery; ui.querySelectorAll('input, select').forEach(input => { input.onchange = () => { settings.unblur = ui.querySelector('#opt-unblur').checked; settings.autoScrape = ui.querySelector('#opt-autoscrape').checked; settings.darkMode = ui.querySelector('#opt-dark').checked; settings.showPreview = ui.querySelector('#opt-preview').checked; settings.ocrLang = ui.querySelector('#opt-lang').value; settings.splitEvery = parseInt(ui.querySelector('#opt-split').value); saveSettings(); applyDarkMode(); }; }); ui.querySelector('#btn-scrape').onclick = () => { const pages = [...document.querySelectorAll( '.page, .reader_column, [id^="page_container"], .outer_page, .abs_page, .scribd_page, .text_layer' )]; if (!pages.length) return alert('❌ No pages found.'); scrapePages(pages, preview); }; ui.querySelector('#btn-export').onclick = () => exportOutput(preview.textContent, 'txt'); ui.querySelector('#btn-html').onclick = () => exportOutput(`<html><body><pre>${preview.textContent}</pre></body></html>`, 'html'); ui.querySelector('#btn-print').onclick = () => printToPDF(preview.textContent); } window.addEventListener('load', () => { applyDarkMode(); unblurContent(); const preview = createPreview(); createUI(preview); if (settings.autoScrape) document.querySelector('#btn-scrape').click(); }); })();