- // ==UserScript==
- // @name Scribd Enhancer All-in-One (v2.7.3)
- // @namespace https://greatest.deepsurf.us/users/Eliminater74
- // @version 2.7.3
- // @description Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.
- // @author Eliminater74
- // @license MIT
- // @match *://*.scribd.com/*
- // @grant none
- // @icon https://s-f.scribdassets.com/favicon.ico
- // ==/UserScript==
-
- (function () {
- 'use strict';
-
- const SETTINGS_KEY = 'scribdEnhancerSettings';
- const defaultSettings = {
- unblur: true,
- autoScrape: false,
- darkMode: false,
- showPreview: true,
- enableOCR: true,
- ocrLang: 'auto',
- splitEvery: 0
- };
- const settings = { ...defaultSettings, ...JSON.parse(localStorage.getItem(SETTINGS_KEY) || '{}') };
- const saveSettings = () => localStorage.setItem(SETTINGS_KEY, JSON.stringify(settings));
-
- const tesseractScript = document.createElement('script');
- tesseractScript.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@4.0.2/dist/tesseract.min.js';
- document.head.appendChild(tesseractScript);
-
- const style = document.createElement('style');
- style.textContent = `
- #se-ui {
- position: fixed; bottom: 20px; right: 20px; background: #222; color: #fff;
- border-radius: 10px; padding: 10px; z-index: 9999; width: 320px;
- font-family: sans-serif; font-size: 13px; box-shadow: 0 0 10px #000;
- }
- #se-ui label, #se-ui select, #se-ui button {
- display: block; width: 100%; margin: 4px 0;
- }
- #se-ui input[type="checkbox"] { margin-right: 6px; }
- #se-ui button {
- background: #444; color: white; border: none; border-radius: 6px; padding: 6px;
- }
- #se-preview {
- position: fixed; top: 10px; right: 20px; bottom: 140px; width: 360px;
- background: #f4f4f4; color: #000; overflow: auto; padding: 10px;
- font-family: monospace; font-size: 12px; white-space: pre-wrap;
- border: 1px solid #999; z-index: 9998; border-radius: 10px;
- }
- .dark-mode #se-preview {
- background: #222; color: #eee; border-color: #555;
- }
- .dark-mode * {
- background-color: transparent !important;
- color: #e0e0e0 !important;
- border-color: #444 !important;
- }
- `;
- document.head.appendChild(style);
-
- function applyDarkMode() {
- document.documentElement.classList.toggle('dark-mode', settings.darkMode);
- document.body.classList.toggle('dark-mode', settings.darkMode);
- }
-
- function unblurContent() {
- if (!settings.unblur) return;
- const cleanup = () => {
- document.querySelectorAll('.blurred_page, .promo_div, [unselectable="on"]').forEach(el => el.remove());
- document.querySelectorAll('*').forEach(el => {
- const cs = getComputedStyle(el);
- if (cs.color === 'transparent') el.style.color = '#111';
- if (cs.textShadow?.includes('white')) el.style.textShadow = 'none';
- });
- };
- cleanup();
- new MutationObserver(cleanup).observe(document.body, { childList: true, subtree: true });
- }
-
- function cleanOCRText(text) {
- return text.split('\n').map(t => t.trim()).filter(line =>
- line.length >= 3 && /[a-zA-Z]/.test(line) && !/^[^a-zA-Z0-9]{3,}$/.test(line)
- ).join('\n');
- }
-
- function detectLanguage(text) {
- const map = { spa: /ñ|á|í|ó|ú/, fra: /é|è|ê|ç/, deu: /ä|ö|ü|ß/, ron: /ș|ț|ă|î|â/ };
- for (const [lang, regex] of Object.entries(map)) {
- if (regex.test(text)) return lang;
- }
- return 'eng';
- }
-
- async function preprocessImage(src) {
- return new Promise(resolve => {
- const img = new Image();
- img.crossOrigin = 'anonymous';
- img.onload = () => {
- if (img.naturalWidth < 100 || img.naturalHeight < 100 || /logo|icon|watermark/i.test(src)) return resolve(null);
- const canvas = document.createElement('canvas');
- canvas.width = img.width; canvas.height = img.height;
- const ctx = canvas.getContext('2d');
- ctx.drawImage(img, 0, 0);
- const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
- for (let i = 0; i < imageData.data.length; i += 4) {
- const avg = (imageData.data[i] + imageData.data[i+1] + imageData.data[i+2]) / 3;
- imageData.data[i] = imageData.data[i+1] = imageData.data[i+2] = avg;
- }
- ctx.putImageData(imageData, 0, 0);
- resolve(canvas.toDataURL('image/png'));
- };
- img.src = src;
- });
- }
-
- function createPreview() {
- const preview = document.createElement('div');
- preview.id = 'se-preview';
- if (settings.showPreview) {
- preview.textContent = '[Preview Initialized]\n';
- document.body.appendChild(preview);
- }
- return preview;
- }
-
- function exportOutput(content, ext) {
- const split = settings.splitEvery;
- const lines = content.split(/(?=\[Page \d+])/);
- if (!split || split < 1) {
- const blob = new Blob([content], { type: `text/${ext}` });
- const a = document.createElement('a');
- a.href = URL.createObjectURL(blob);
- a.download = `scribd_output.${ext}`;
- a.click();
- } else {
- for (let i = 0; i < lines.length; i += split) {
- const chunk = lines.slice(i, i + split).join('\n');
- const blob = new Blob([chunk], { type: `text/${ext}` });
- const a = document.createElement('a');
- a.href = URL.createObjectURL(blob);
- a.download = `scribd_part${Math.floor(i / split) + 1}.${ext}`;
- a.click();
- }
- }
- }
-
- function printToPDF(content) {
- const win = window.open('', 'PrintView');
- win.document.write(`<html><head><title>Scribd Print</title></head><body><pre>${content}</pre></body></html>`);
- win.document.close();
- win.focus();
- setTimeout(() => win.print(), 600);
- }
-
- async function scrapePages(pages, preview) {
- const concurrency = 4;
- let index = 0;
- const firstText = [];
-
- async function scrape(page, i) {
- page.scrollIntoView();
- await new Promise(r => setTimeout(r, 300));
-
- let found = false;
- const text = page.innerText.trim();
- if (text) {
- preview.textContent += `[Page ${i + 1}] ✅\n${text}\n\n`;
- firstText.push(text);
- found = true;
- }
-
- if (settings.enableOCR && window.Tesseract) {
- const imgs = page.querySelectorAll('img');
- for (let img of imgs) {
- const src = img.src || '';
- const processed = await preprocessImage(src);
- if (!processed) continue;
- const lang = settings.ocrLang === 'auto' ? detectLanguage(firstText.join(' ')) : settings.ocrLang;
- const result = await window.Tesseract.recognize(processed, lang);
- const ocrText = cleanOCRText(result.data.text || '');
- if (ocrText) {
- preview.textContent += `[OCR] ${ocrText}\n\n`;
- found = true;
- }
- }
- }
-
- if (!found) preview.textContent += `[Page ${i + 1}] ❌ No content\n\n`;
- }
-
- const tasks = Array(concurrency).fill(null).map(async () => {
- while (index < pages.length) {
- const i = index++;
- await scrape(pages[i], i);
- }
- });
- await Promise.all(tasks);
- alert(`✅ Scraped ${pages.length} pages.`);
- }
-
- function createUI(preview) {
- const ui = document.createElement('div');
- ui.id = 'se-ui';
- ui.innerHTML = `
- <label><input type="checkbox" id="opt-unblur"> Unblur</label>
- <label><input type="checkbox" id="opt-autoscrape"> Auto Scrape</label>
- <label><input type="checkbox" id="opt-dark"> Dark Mode</label>
- <label><input type="checkbox" id="opt-preview"> Show Preview</label>
- <label>OCR:
- <select id="opt-lang">
- <option value="auto">Auto</option>
- <option value="eng">English</option>
- <option value="spa">Spanish</option>
- <option value="fra">French</option>
- <option value="deu">German</option>
- </select>
- </label>
- <label>Split Every:
- <select id="opt-split">
- <option value="0">Off</option>
- <option value="100">100</option>
- <option value="250">250</option>
- <option value="500">500</option>
- </select>
- </label>
- <button id="btn-scrape">📖 Scrape Pages</button>
- <button id="btn-export">💾 Export TXT</button>
- <button id="btn-html">🧾 Export HTML</button>
- <button id="btn-print">🖨️ Print to PDF</button>
- `;
- document.body.appendChild(ui);
-
- ui.querySelector('#opt-unblur').checked = settings.unblur;
- ui.querySelector('#opt-autoscrape').checked = settings.autoScrape;
- ui.querySelector('#opt-dark').checked = settings.darkMode;
- ui.querySelector('#opt-preview').checked = settings.showPreview;
- ui.querySelector('#opt-lang').value = settings.ocrLang;
- ui.querySelector('#opt-split').value = settings.splitEvery;
-
- ui.querySelectorAll('input, select').forEach(input => {
- input.onchange = () => {
- settings.unblur = ui.querySelector('#opt-unblur').checked;
- settings.autoScrape = ui.querySelector('#opt-autoscrape').checked;
- settings.darkMode = ui.querySelector('#opt-dark').checked;
- settings.showPreview = ui.querySelector('#opt-preview').checked;
- settings.ocrLang = ui.querySelector('#opt-lang').value;
- settings.splitEvery = parseInt(ui.querySelector('#opt-split').value);
- saveSettings();
- applyDarkMode();
- };
- });
-
- ui.querySelector('#btn-scrape').onclick = () => {
- const pages = [...document.querySelectorAll(
- '.page, .reader_column, [id^="page_container"], .outer_page, .abs_page, .scribd_page, .text_layer'
- )];
- if (!pages.length) return alert('❌ No pages found.');
- scrapePages(pages, preview);
- };
- ui.querySelector('#btn-export').onclick = () => exportOutput(preview.textContent, 'txt');
- ui.querySelector('#btn-html').onclick = () => exportOutput(`<html><body><pre>${preview.textContent}</pre></body></html>`, 'html');
- ui.querySelector('#btn-print').onclick = () => printToPDF(preview.textContent);
- }
-
- window.addEventListener('load', () => {
- applyDarkMode();
- unblurContent();
- const preview = createPreview();
- createUI(preview);
- if (settings.autoScrape) document.querySelector('#btn-scrape').click();
- });
- })();