Scribd Enhancer All-in-One (v2.7.3)

Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.

// ==UserScript==
// @name         Scribd Enhancer All-in-One (v2.7.3)
// @namespace    https://greatest.deepsurf.us/users/Eliminater74
// @version      2.7.3
// @description  Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.
// @author       Eliminater74
// @license      MIT
// @match        *://*.scribd.com/*
// @grant        none
// @icon         https://s-f.scribdassets.com/favicon.ico
// ==/UserScript==

(function () {
  'use strict';

  const SETTINGS_KEY = 'scribdEnhancerSettings';
  const defaultSettings = {
    unblur: true,
    autoScrape: false,
    darkMode: false,
    showPreview: true,
    enableOCR: true,
    ocrLang: 'auto',
    splitEvery: 0
  };
  const settings = { ...defaultSettings, ...JSON.parse(localStorage.getItem(SETTINGS_KEY) || '{}') };
  const saveSettings = () => localStorage.setItem(SETTINGS_KEY, JSON.stringify(settings));

  const tesseractScript = document.createElement('script');
  tesseractScript.src = 'https://cdn.jsdelivr.net/npm/[email protected]/dist/tesseract.min.js';
  document.head.appendChild(tesseractScript);

  const style = document.createElement('style');
  style.textContent = `
    #se-ui {
      position: fixed; bottom: 20px; right: 20px; background: #222; color: #fff;
      border-radius: 10px; padding: 10px; z-index: 9999; width: 320px;
      font-family: sans-serif; font-size: 13px; box-shadow: 0 0 10px #000;
    }
    #se-ui label, #se-ui select, #se-ui button {
      display: block; width: 100%; margin: 4px 0;
    }
    #se-ui input[type="checkbox"] { margin-right: 6px; }
    #se-ui button {
      background: #444; color: white; border: none; border-radius: 6px; padding: 6px;
    }
    #se-preview {
      position: fixed; top: 10px; right: 20px; bottom: 140px; width: 360px;
      background: #f4f4f4; color: #000; overflow: auto; padding: 10px;
      font-family: monospace; font-size: 12px; white-space: pre-wrap;
      border: 1px solid #999; z-index: 9998; border-radius: 10px;
    }
    .dark-mode #se-preview {
      background: #222; color: #eee; border-color: #555;
    }
    .dark-mode * {
      background-color: transparent !important;
      color: #e0e0e0 !important;
      border-color: #444 !important;
    }
  `;
  document.head.appendChild(style);

  function applyDarkMode() {
    document.documentElement.classList.toggle('dark-mode', settings.darkMode);
    document.body.classList.toggle('dark-mode', settings.darkMode);
  }

  function unblurContent() {
    if (!settings.unblur) return;
    const cleanup = () => {
      document.querySelectorAll('.blurred_page, .promo_div, [unselectable="on"]').forEach(el => el.remove());
      document.querySelectorAll('*').forEach(el => {
        const cs = getComputedStyle(el);
        if (cs.color === 'transparent') el.style.color = '#111';
        if (cs.textShadow?.includes('white')) el.style.textShadow = 'none';
      });
    };
    cleanup();
    new MutationObserver(cleanup).observe(document.body, { childList: true, subtree: true });
  }

  function cleanOCRText(text) {
    return text.split('\n').map(t => t.trim()).filter(line =>
      line.length >= 3 && /[a-zA-Z]/.test(line) && !/^[^a-zA-Z0-9]{3,}$/.test(line)
    ).join('\n');
  }

  function detectLanguage(text) {
    const map = { spa: /ñ|á|í|ó|ú/, fra: /é|è|ê|ç/, deu: /ä|ö|ü|ß/, ron: /ș|ț|ă|î|â/ };
    for (const [lang, regex] of Object.entries(map)) {
      if (regex.test(text)) return lang;
    }
    return 'eng';
  }

  async function preprocessImage(src) {
    return new Promise(resolve => {
      const img = new Image();
      img.crossOrigin = 'anonymous';
      img.onload = () => {
        if (img.naturalWidth < 100 || img.naturalHeight < 100 || /logo|icon|watermark/i.test(src)) return resolve(null);
        const canvas = document.createElement('canvas');
        canvas.width = img.width; canvas.height = img.height;
        const ctx = canvas.getContext('2d');
        ctx.drawImage(img, 0, 0);
        const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
        for (let i = 0; i < imageData.data.length; i += 4) {
          const avg = (imageData.data[i] + imageData.data[i+1] + imageData.data[i+2]) / 3;
          imageData.data[i] = imageData.data[i+1] = imageData.data[i+2] = avg;
        }
        ctx.putImageData(imageData, 0, 0);
        resolve(canvas.toDataURL('image/png'));
      };
      img.src = src;
    });
  }

  function createPreview() {
    const preview = document.createElement('div');
    preview.id = 'se-preview';
    if (settings.showPreview) {
      preview.textContent = '[Preview Initialized]\n';
      document.body.appendChild(preview);
    }
    return preview;
  }

  function exportOutput(content, ext) {
    const split = settings.splitEvery;
    const lines = content.split(/(?=\[Page \d+])/);
    if (!split || split < 1) {
      const blob = new Blob([content], { type: `text/${ext}` });
      const a = document.createElement('a');
      a.href = URL.createObjectURL(blob);
      a.download = `scribd_output.${ext}`;
      a.click();
    } else {
      for (let i = 0; i < lines.length; i += split) {
        const chunk = lines.slice(i, i + split).join('\n');
        const blob = new Blob([chunk], { type: `text/${ext}` });
        const a = document.createElement('a');
        a.href = URL.createObjectURL(blob);
        a.download = `scribd_part${Math.floor(i / split) + 1}.${ext}`;
        a.click();
      }
    }
  }

  function printToPDF(content) {
    const win = window.open('', 'PrintView');
    win.document.write(`<html><head><title>Scribd Print</title></head><body><pre>${content}</pre></body></html>`);
    win.document.close();
    win.focus();
    setTimeout(() => win.print(), 600);
  }

  async function scrapePages(pages, preview) {
    const concurrency = 4;
    let index = 0;
    const firstText = [];

    async function scrape(page, i) {
      page.scrollIntoView();
      await new Promise(r => setTimeout(r, 300));

      let found = false;
      const text = page.innerText.trim();
      if (text) {
        preview.textContent += `[Page ${i + 1}] ✅\n${text}\n\n`;
        firstText.push(text);
        found = true;
      }

      if (settings.enableOCR && window.Tesseract) {
        const imgs = page.querySelectorAll('img');
        for (let img of imgs) {
          const src = img.src || '';
          const processed = await preprocessImage(src);
          if (!processed) continue;
          const lang = settings.ocrLang === 'auto' ? detectLanguage(firstText.join(' ')) : settings.ocrLang;
          const result = await window.Tesseract.recognize(processed, lang);
          const ocrText = cleanOCRText(result.data.text || '');
          if (ocrText) {
            preview.textContent += `[OCR] ${ocrText}\n\n`;
            found = true;
          }
        }
      }

      if (!found) preview.textContent += `[Page ${i + 1}] ❌ No content\n\n`;
    }

    const tasks = Array(concurrency).fill(null).map(async () => {
      while (index < pages.length) {
        const i = index++;
        await scrape(pages[i], i);
      }
    });
    await Promise.all(tasks);
    alert(`✅ Scraped ${pages.length} pages.`);
  }

  function createUI(preview) {
    const ui = document.createElement('div');
    ui.id = 'se-ui';
    ui.innerHTML = `
      <label><input type="checkbox" id="opt-unblur"> Unblur</label>
      <label><input type="checkbox" id="opt-autoscrape"> Auto Scrape</label>
      <label><input type="checkbox" id="opt-dark"> Dark Mode</label>
      <label><input type="checkbox" id="opt-preview"> Show Preview</label>
      <label>OCR:
        <select id="opt-lang">
          <option value="auto">Auto</option>
          <option value="eng">English</option>
          <option value="spa">Spanish</option>
          <option value="fra">French</option>
          <option value="deu">German</option>
        </select>
      </label>
      <label>Split Every:
        <select id="opt-split">
          <option value="0">Off</option>
          <option value="100">100</option>
          <option value="250">250</option>
          <option value="500">500</option>
        </select>
      </label>
      <button id="btn-scrape">📖 Scrape Pages</button>
      <button id="btn-export">💾 Export TXT</button>
      <button id="btn-html">🧾 Export HTML</button>
      <button id="btn-print">🖨️ Print to PDF</button>
    `;
    document.body.appendChild(ui);

    ui.querySelector('#opt-unblur').checked = settings.unblur;
    ui.querySelector('#opt-autoscrape').checked = settings.autoScrape;
    ui.querySelector('#opt-dark').checked = settings.darkMode;
    ui.querySelector('#opt-preview').checked = settings.showPreview;
    ui.querySelector('#opt-lang').value = settings.ocrLang;
    ui.querySelector('#opt-split').value = settings.splitEvery;

    ui.querySelectorAll('input, select').forEach(input => {
      input.onchange = () => {
        settings.unblur = ui.querySelector('#opt-unblur').checked;
        settings.autoScrape = ui.querySelector('#opt-autoscrape').checked;
        settings.darkMode = ui.querySelector('#opt-dark').checked;
        settings.showPreview = ui.querySelector('#opt-preview').checked;
        settings.ocrLang = ui.querySelector('#opt-lang').value;
        settings.splitEvery = parseInt(ui.querySelector('#opt-split').value);
        saveSettings();
        applyDarkMode();
      };
    });

    ui.querySelector('#btn-scrape').onclick = () => {
      const pages = [...document.querySelectorAll(
        '.page, .reader_column, [id^="page_container"], .outer_page, .abs_page, .scribd_page, .text_layer'
      )];
      if (!pages.length) return alert('❌ No pages found.');
      scrapePages(pages, preview);
    };
    ui.querySelector('#btn-export').onclick = () => exportOutput(preview.textContent, 'txt');
    ui.querySelector('#btn-html').onclick = () => exportOutput(`<html><body><pre>${preview.textContent}</pre></body></html>`, 'html');
    ui.querySelector('#btn-print').onclick = () => printToPDF(preview.textContent);
  }

  window.addEventListener('load', () => {
    applyDarkMode();
    unblurContent();
    const preview = createPreview();
    createUI(preview);
    if (settings.autoScrape) document.querySelector('#btn-scrape').click();
  });
})();