Scribd Enhancer All-in-One (v2.7.3)

Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.

  1. // ==UserScript==
  2. // @name Scribd Enhancer All-in-One (v2.7.3)
  3. // @namespace https://greatest.deepsurf.us/users/Eliminater74
  4. // @version 2.7.3
  5. // @description Scribd Enhancer with OCR, export, print, parallel scraping, and clean UI. Auto language detection, smart filtering, output splitting — full feature set restored and refined. By Eliminater74.
  6. // @author Eliminater74
  7. // @license MIT
  8. // @match *://*.scribd.com/*
  9. // @grant none
  10. // @icon https://s-f.scribdassets.com/favicon.ico
  11. // ==/UserScript==
  12.  
  13. (function () {
  14. 'use strict';
  15.  
  16. const SETTINGS_KEY = 'scribdEnhancerSettings';
  17. const defaultSettings = {
  18. unblur: true,
  19. autoScrape: false,
  20. darkMode: false,
  21. showPreview: true,
  22. enableOCR: true,
  23. ocrLang: 'auto',
  24. splitEvery: 0
  25. };
  26. const settings = { ...defaultSettings, ...JSON.parse(localStorage.getItem(SETTINGS_KEY) || '{}') };
  27. const saveSettings = () => localStorage.setItem(SETTINGS_KEY, JSON.stringify(settings));
  28.  
  29. const tesseractScript = document.createElement('script');
  30. tesseractScript.src = 'https://cdn.jsdelivr.net/npm/tesseract.js@4.0.2/dist/tesseract.min.js';
  31. document.head.appendChild(tesseractScript);
  32.  
  33. const style = document.createElement('style');
  34. style.textContent = `
  35. #se-ui {
  36. position: fixed; bottom: 20px; right: 20px; background: #222; color: #fff;
  37. border-radius: 10px; padding: 10px; z-index: 9999; width: 320px;
  38. font-family: sans-serif; font-size: 13px; box-shadow: 0 0 10px #000;
  39. }
  40. #se-ui label, #se-ui select, #se-ui button {
  41. display: block; width: 100%; margin: 4px 0;
  42. }
  43. #se-ui input[type="checkbox"] { margin-right: 6px; }
  44. #se-ui button {
  45. background: #444; color: white; border: none; border-radius: 6px; padding: 6px;
  46. }
  47. #se-preview {
  48. position: fixed; top: 10px; right: 20px; bottom: 140px; width: 360px;
  49. background: #f4f4f4; color: #000; overflow: auto; padding: 10px;
  50. font-family: monospace; font-size: 12px; white-space: pre-wrap;
  51. border: 1px solid #999; z-index: 9998; border-radius: 10px;
  52. }
  53. .dark-mode #se-preview {
  54. background: #222; color: #eee; border-color: #555;
  55. }
  56. .dark-mode * {
  57. background-color: transparent !important;
  58. color: #e0e0e0 !important;
  59. border-color: #444 !important;
  60. }
  61. `;
  62. document.head.appendChild(style);
  63.  
  64. function applyDarkMode() {
  65. document.documentElement.classList.toggle('dark-mode', settings.darkMode);
  66. document.body.classList.toggle('dark-mode', settings.darkMode);
  67. }
  68.  
  69. function unblurContent() {
  70. if (!settings.unblur) return;
  71. const cleanup = () => {
  72. document.querySelectorAll('.blurred_page, .promo_div, [unselectable="on"]').forEach(el => el.remove());
  73. document.querySelectorAll('*').forEach(el => {
  74. const cs = getComputedStyle(el);
  75. if (cs.color === 'transparent') el.style.color = '#111';
  76. if (cs.textShadow?.includes('white')) el.style.textShadow = 'none';
  77. });
  78. };
  79. cleanup();
  80. new MutationObserver(cleanup).observe(document.body, { childList: true, subtree: true });
  81. }
  82.  
  83. function cleanOCRText(text) {
  84. return text.split('\n').map(t => t.trim()).filter(line =>
  85. line.length >= 3 && /[a-zA-Z]/.test(line) && !/^[^a-zA-Z0-9]{3,}$/.test(line)
  86. ).join('\n');
  87. }
  88.  
  89. function detectLanguage(text) {
  90. const map = { spa: /ñ|á|í|ó|ú/, fra: /é|è|ê|ç/, deu: /ä|ö|ü|ß/, ron: /ș|ț|ă|î|â/ };
  91. for (const [lang, regex] of Object.entries(map)) {
  92. if (regex.test(text)) return lang;
  93. }
  94. return 'eng';
  95. }
  96.  
  97. async function preprocessImage(src) {
  98. return new Promise(resolve => {
  99. const img = new Image();
  100. img.crossOrigin = 'anonymous';
  101. img.onload = () => {
  102. if (img.naturalWidth < 100 || img.naturalHeight < 100 || /logo|icon|watermark/i.test(src)) return resolve(null);
  103. const canvas = document.createElement('canvas');
  104. canvas.width = img.width; canvas.height = img.height;
  105. const ctx = canvas.getContext('2d');
  106. ctx.drawImage(img, 0, 0);
  107. const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
  108. for (let i = 0; i < imageData.data.length; i += 4) {
  109. const avg = (imageData.data[i] + imageData.data[i+1] + imageData.data[i+2]) / 3;
  110. imageData.data[i] = imageData.data[i+1] = imageData.data[i+2] = avg;
  111. }
  112. ctx.putImageData(imageData, 0, 0);
  113. resolve(canvas.toDataURL('image/png'));
  114. };
  115. img.src = src;
  116. });
  117. }
  118.  
  119. function createPreview() {
  120. const preview = document.createElement('div');
  121. preview.id = 'se-preview';
  122. if (settings.showPreview) {
  123. preview.textContent = '[Preview Initialized]\n';
  124. document.body.appendChild(preview);
  125. }
  126. return preview;
  127. }
  128.  
  129. function exportOutput(content, ext) {
  130. const split = settings.splitEvery;
  131. const lines = content.split(/(?=\[Page \d+])/);
  132. if (!split || split < 1) {
  133. const blob = new Blob([content], { type: `text/${ext}` });
  134. const a = document.createElement('a');
  135. a.href = URL.createObjectURL(blob);
  136. a.download = `scribd_output.${ext}`;
  137. a.click();
  138. } else {
  139. for (let i = 0; i < lines.length; i += split) {
  140. const chunk = lines.slice(i, i + split).join('\n');
  141. const blob = new Blob([chunk], { type: `text/${ext}` });
  142. const a = document.createElement('a');
  143. a.href = URL.createObjectURL(blob);
  144. a.download = `scribd_part${Math.floor(i / split) + 1}.${ext}`;
  145. a.click();
  146. }
  147. }
  148. }
  149.  
  150. function printToPDF(content) {
  151. const win = window.open('', 'PrintView');
  152. win.document.write(`<html><head><title>Scribd Print</title></head><body><pre>${content}</pre></body></html>`);
  153. win.document.close();
  154. win.focus();
  155. setTimeout(() => win.print(), 600);
  156. }
  157.  
  158. async function scrapePages(pages, preview) {
  159. const concurrency = 4;
  160. let index = 0;
  161. const firstText = [];
  162.  
  163. async function scrape(page, i) {
  164. page.scrollIntoView();
  165. await new Promise(r => setTimeout(r, 300));
  166.  
  167. let found = false;
  168. const text = page.innerText.trim();
  169. if (text) {
  170. preview.textContent += `[Page ${i + 1}] \n${text}\n\n`;
  171. firstText.push(text);
  172. found = true;
  173. }
  174.  
  175. if (settings.enableOCR && window.Tesseract) {
  176. const imgs = page.querySelectorAll('img');
  177. for (let img of imgs) {
  178. const src = img.src || '';
  179. const processed = await preprocessImage(src);
  180. if (!processed) continue;
  181. const lang = settings.ocrLang === 'auto' ? detectLanguage(firstText.join(' ')) : settings.ocrLang;
  182. const result = await window.Tesseract.recognize(processed, lang);
  183. const ocrText = cleanOCRText(result.data.text || '');
  184. if (ocrText) {
  185. preview.textContent += `[OCR] ${ocrText}\n\n`;
  186. found = true;
  187. }
  188. }
  189. }
  190.  
  191. if (!found) preview.textContent += `[Page ${i + 1}] No content\n\n`;
  192. }
  193.  
  194. const tasks = Array(concurrency).fill(null).map(async () => {
  195. while (index < pages.length) {
  196. const i = index++;
  197. await scrape(pages[i], i);
  198. }
  199. });
  200. await Promise.all(tasks);
  201. alert(`✅ Scraped ${pages.length} pages.`);
  202. }
  203.  
  204. function createUI(preview) {
  205. const ui = document.createElement('div');
  206. ui.id = 'se-ui';
  207. ui.innerHTML = `
  208. <label><input type="checkbox" id="opt-unblur"> Unblur</label>
  209. <label><input type="checkbox" id="opt-autoscrape"> Auto Scrape</label>
  210. <label><input type="checkbox" id="opt-dark"> Dark Mode</label>
  211. <label><input type="checkbox" id="opt-preview"> Show Preview</label>
  212. <label>OCR:
  213. <select id="opt-lang">
  214. <option value="auto">Auto</option>
  215. <option value="eng">English</option>
  216. <option value="spa">Spanish</option>
  217. <option value="fra">French</option>
  218. <option value="deu">German</option>
  219. </select>
  220. </label>
  221. <label>Split Every:
  222. <select id="opt-split">
  223. <option value="0">Off</option>
  224. <option value="100">100</option>
  225. <option value="250">250</option>
  226. <option value="500">500</option>
  227. </select>
  228. </label>
  229. <button id="btn-scrape">📖 Scrape Pages</button>
  230. <button id="btn-export">💾 Export TXT</button>
  231. <button id="btn-html">🧾 Export HTML</button>
  232. <button id="btn-print">🖨️ Print to PDF</button>
  233. `;
  234. document.body.appendChild(ui);
  235.  
  236. ui.querySelector('#opt-unblur').checked = settings.unblur;
  237. ui.querySelector('#opt-autoscrape').checked = settings.autoScrape;
  238. ui.querySelector('#opt-dark').checked = settings.darkMode;
  239. ui.querySelector('#opt-preview').checked = settings.showPreview;
  240. ui.querySelector('#opt-lang').value = settings.ocrLang;
  241. ui.querySelector('#opt-split').value = settings.splitEvery;
  242.  
  243. ui.querySelectorAll('input, select').forEach(input => {
  244. input.onchange = () => {
  245. settings.unblur = ui.querySelector('#opt-unblur').checked;
  246. settings.autoScrape = ui.querySelector('#opt-autoscrape').checked;
  247. settings.darkMode = ui.querySelector('#opt-dark').checked;
  248. settings.showPreview = ui.querySelector('#opt-preview').checked;
  249. settings.ocrLang = ui.querySelector('#opt-lang').value;
  250. settings.splitEvery = parseInt(ui.querySelector('#opt-split').value);
  251. saveSettings();
  252. applyDarkMode();
  253. };
  254. });
  255.  
  256. ui.querySelector('#btn-scrape').onclick = () => {
  257. const pages = [...document.querySelectorAll(
  258. '.page, .reader_column, [id^="page_container"], .outer_page, .abs_page, .scribd_page, .text_layer'
  259. )];
  260. if (!pages.length) return alert('❌ No pages found.');
  261. scrapePages(pages, preview);
  262. };
  263. ui.querySelector('#btn-export').onclick = () => exportOutput(preview.textContent, 'txt');
  264. ui.querySelector('#btn-html').onclick = () => exportOutput(`<html><body><pre>${preview.textContent}</pre></body></html>`, 'html');
  265. ui.querySelector('#btn-print').onclick = () => printToPDF(preview.textContent);
  266. }
  267.  
  268. window.addEventListener('load', () => {
  269. applyDarkMode();
  270. unblurContent();
  271. const preview = createPreview();
  272. createUI(preview);
  273. if (settings.autoScrape) document.querySelector('#btn-scrape').click();
  274. });
  275. })();