Voz save thread to file

Save your favorite thread into a html file

  1. // ==UserScript==
  2. // @name Voz save thread to file
  3. // @description Save your favorite thread into a html file
  4. // @namespace Violentmonkey Scripts
  5. // @match *://*.voz.vn/*
  6. // @grant GM_xmlhttpRequest
  7. // @version 0.5
  8. // @run-at document-idle
  9. // @license MIT
  10. // ==/UserScript==
  11. let wT = 20; //in ms
  12. const saveWithImages=true;
  13. const agressiveFetch=true; //false de tai trang lan luot, true de lay nhieu (15) cug luc
  14. if (agressiveFetch) wT+=300;
  15.  
  16. const sleep = (ms) => new Promise((rs) => setTimeout(rs, ms));
  17.  
  18. async function zip(data) { // return Blob
  19. let blob=new Blob([data]);
  20. const cs = new CompressionStream("gzip");
  21. const compressedStream = blob.stream().pipeThrough(cs);
  22. return await new Response(compressedStream).blob();
  23. }
  24.  
  25. async function hash(message) {
  26. const msgUint8 = new TextEncoder().encode(message);
  27. const hashBuffer = await window.crypto.subtle.digest("SHA-1", msgUint8);
  28. const hashArray = Array.from(new Uint8Array(hashBuffer));
  29. return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
  30. }
  31.  
  32. function xhr(url, detail) {
  33. const nurl=new URL(url);
  34. const option={'url':url, origin:nurl.origin}
  35.  
  36. if (typeof detail =='string' && /^(?:blob|text|json|arraybuffer|document)$/.test(detail)) option['responseType']=detail;
  37. if (typeof detail =='object') option=detail;
  38.  
  39. return new Promise(rs=>{
  40. option['onloadend']=res=> (res.status==200) ? rs(res.response) : rs(false);
  41. const c = GM_xmlhttpRequest(option);
  42. })
  43. }
  44.  
  45. let threadId;
  46. let images={};
  47. async function convertContent(htmlStr, fetchWrapper=false) {
  48. dump=new DOMParser();
  49. let html=dump.parseFromString(htmlStr,'text/html');
  50.  
  51. html.querySelector('.blockMessage--none')?.remove();
  52. html.querySelectorAll('form').forEach(el=>el?.remove());
  53. html.querySelectorAll('div.block').forEach(el=>{
  54. if (el.matches('.block--messages')) return;
  55. el.remove() });
  56.  
  57. html.querySelectorAll('div.p-body-main.p-body-main--withSidebar>*').forEach(el=>{
  58. if (el.matches('.p-body-content')) return;
  59. el.remove(); });
  60. html.querySelector('footer.p-footer')?.remove();
  61.  
  62.  
  63. html.querySelectorAll('[href]').forEach(el=> {
  64. let href=el.getAttribute('href');
  65. if (href.startsWith('/')) el.setAttribute('href','https://voz.vn'+ href);
  66. });
  67.  
  68. html.querySelectorAll('[src]').forEach(el=> {
  69. // if(el.tagName=='SCRIPT') return; //skip script;
  70. let src=el.getAttribute('src');
  71. if (src.startsWith('data:image')) el.setAttribute('src',el.getAttribute('data-src'));
  72. if (src.startsWith('/')) el.setAttribute('src','https://voz.vn'+ src);
  73. });
  74.  
  75. html.querySelectorAll('[srcset]').forEach(el=> {
  76. src=el.getAttribute('srcset').split(',').map(a=>{ if (a.startsWith('/')) return 'https://voz.vn'+a }).join(',');
  77. el.setAttribute('srcset',src);
  78. });
  79.  
  80. //Expand long quote
  81. html.querySelectorAll('div.bbCodeBlock-content>div.bbCodeBlock-expandContent.js-expandContent').forEach(el=>el.className='')
  82.  
  83. //Spoiler
  84. html.querySelectorAll('.bbCodeSpoiler-button,.bbCodeSpoiler-content').forEach(el=>el.classList.add('is-active'))
  85.  
  86. // Sửa link trang
  87. html.querySelectorAll('div.pageNav a').forEach(el=>{
  88. el.removeAttribute('href');
  89. });
  90.  
  91. if(saveWithImages) {
  92. // let p= html.querySelectorAll('img.bbImage');
  93. let p= html.querySelectorAll('img');
  94. for (let i=0; i<p.length; i++) {
  95. if (p[i].src.startsWith('data:image')) continue;
  96. const key=await hash(p[i].src);
  97. if (images[key]!==undefined) continue;
  98. const org=new URL(p[i].src);
  99. const b=await xhr(p[i].src,'blob'); //GM_xmlhttpRequest can bypass cors
  100. const c=(await toDataURL(b)).replace(/data:.*;base64,/,'data:image;base64,');
  101. images[key]=c;
  102. p[i].setAttribute('image-data',key);
  103. }
  104. }
  105.  
  106. if (fetchWrapper) {
  107. const styles = html.querySelectorAll('link[rel="stylesheet"]')
  108. for (let i=0; i<styles.length; i++) styles[i].href=await toDataURL(await xhr(styles[i].href,'blob'));
  109. }
  110.  
  111. let threadBody=await toDataURL(await zip(html.querySelector('div.p-body-main')?.outerHTML));
  112. html.querySelector('div.p-body-main').outerHTML=`{ThreadBody_${threadId}}`;
  113. htmlStr=fetchWrapper ? await toDataURL(await zip(new XMLSerializer().serializeToString(html))) : '';
  114. return {threadWrapper:htmlStr, threadBody}
  115. }
  116.  
  117. function toDataURL(data) {
  118. return new Promise((rs,rj)=>{
  119. const fs=new FileReader();
  120. fs.onload=()=> rs(fs.result);
  121. fs.onerror=()=>rj(fs.error);
  122. fs.readAsDataURL(new Blob([data]));
  123. });
  124. }
  125.  
  126. async function saveThread() {
  127. const maxPage = parseInt(document.querySelector("ul.pageNav-main>li:last-of-type>a")?.textContent)??1;
  128. document.body.insertAdjacentHTML("beforeend",`<progress id="us_vtstf_progress" value=0 max=100 style="position:fixed;left:2px; bottom: 1px; width:99%"></progress>`);
  129. const progressBar=document.querySelector('#us_vtstf_progress');
  130. progressBar.max=maxPage;
  131.  
  132. let pages=[]
  133. async function fetchContent(pageNo) {
  134. let pageUrl = `https://voz.vn/t/${threadId}/page-${pageNo}`;
  135. let data= await fetch(pageUrl).then((res)=> {
  136. if (res.status !==200) return false;
  137. return res.text(); });
  138. if (!data) return false;
  139.  
  140. let { threadWrapper, threadBody } = await convertContent(data, pageNo==1);
  141. if (pageNo==1) {pages[0]= threadWrapper; pages[1]=threadBody;}
  142. else pages[pageNo]=threadBody;
  143. progressBar.value=pages.length-1;
  144. return true;
  145. }
  146.  
  147. if(agressiveFetch) {
  148. const runsNumber=15;
  149. let i=1;
  150. while (i<=maxPage) {
  151. let run=[];
  152. while (run.length<runsNumber && i<=maxPage) {
  153. run.push(fetchContent(i));
  154. i++; }
  155. const result = (await Promise.all(run)).filter((el, i)=>{if (!el) return i});
  156. if(result.length>1) {
  157. i=Math.min(...result);
  158. console.log('Sâm thinh roong, wait for 2s to continue');
  159. await sleep(2000);}
  160. await sleep(wT);
  161. }
  162. } else {
  163. for (let i = 1; i <= maxPage; i++) {
  164. await fetchContent(i);
  165. await sleep(wT); }
  166. }
  167. progressBar.remove();
  168. return JSON.stringify(pages);
  169. }
  170.  
  171. (async function main() {
  172. //create SaveThread Button
  173. dump = location.href.match(/https:\/\/(?:.*\.)?voz.vn\/(f|t)\/[a-z\d\-]+.(\d+)\/?(page-(\d+))?/);
  174. let fOrT;
  175. if (dump) { fOrT = dump[1]; threadId = dump[2]; }
  176.  
  177. if (fOrT == "t") {
  178. const btnSave = document.createElement("a");
  179. btnSave.classList.add("pageNav-jump", "pageNav-jump--next");
  180. btnSave.textContent = "Save Thread to file";
  181. btnSave.onclick = async ()=>await exportToFile(await saveThread());
  182. btnSave.style = "cursor:pointer;";
  183. document.querySelectorAll("ul.pageNav-main")
  184. .forEach((el, i) =>i == 0 ? el.parentElement.appendChild(btnSave) : ((dump = btnSave.cloneNode(true)), (dump.onclick = async ()=>await exportToFile(await saveThread())),el.parentElement.appendChild(dump)));
  185. }
  186. })();
  187.  
  188. async function exportToFile(pages) {
  189. let html = `<html>
  190. <head>
  191. </head>
  192. <body>
  193. <div id="screen"></div>
  194. <style>
  195. #screen div.pageNav a{cursor: pointer;}
  196. </style>
  197. <script>
  198. const threadBodyReplacement = '{ThreadBody_${threadId}}';
  199.  
  200. async function unZip(data) { //return Blob, lấy text thì them await .text()
  201. let blob=new Blob([data]);
  202. const ds = new DecompressionStream("gzip");
  203. const decompressedStream = blob.stream().pipeThrough(ds);
  204. return await new Response(decompressedStream).blob();
  205. }
  206.  
  207. let screen = document.getElementById('screen');
  208. let threadWrapper; //load in main()
  209.  
  210. async function showPage(pageId=1) {
  211. const threadBody= await (await unZip(await fetch(pages[pageId]).then(a=>a.blob()) )).text();
  212. const pageContent= threadWrapper.replace(threadBodyReplacement,threadBody);
  213. screen.innerHTML=pageContent;
  214.  
  215. //load images
  216. screen.querySelectorAll('img').forEach(el=>{
  217. const key=el.getAttribute('image-data');
  218. /(?:[0-9a-f][0-9a-f])+/.test(key) ? el.src=images[key] :''; //createObjectURL tu dataUrl bao loi, khong chuyen qua, mat cong
  219. });
  220.  
  221. //Page number click
  222. screen.querySelectorAll('ul.pageNav-main a:not([id])').forEach(el=> el.addEventListener('click',e=> {
  223. e.preventDefault();
  224. showPage(parseInt(e.target.textContent.trim()));
  225. }));
  226.  
  227. //Goto page Click
  228. screen.querySelectorAll('ul.pageNav-main a[title="Go to page"]')?.forEach(el => el.addEventListener('click',e=>{
  229. let pageNo = prompt('Enter page number','1');
  230. if (isNaN(pageNo)) return;
  231. showPage(pageNo);
  232. })
  233. )
  234.  
  235. //Next Click
  236. screen.querySelectorAll('.pageNav-jump.pageNav-jump--next')?.forEach(el=>el.addEventListener('click',(e)=> {
  237. showPage(parseInt(pageId)+1);
  238. } ));
  239.  
  240. //Prev Click
  241. screen.querySelectorAll('.pageNav-jump.pageNav-jump--prev')?.forEach(el=>el.addEventListener('click',(e)=> {
  242. showPage(parseInt(pageId)-1);
  243. } ));
  244.  
  245. scrollTo(0,0);
  246. }
  247.  
  248. </script></body></html>`
  249.  
  250. html=html.replace(`</script></body></html>`, 'var pages='+ pages+';\n'+
  251. 'var images= ' + JSON.stringify(images) + ';\n' +
  252. `(async function main() {
  253. threadWrapper=await (await unZip(await fetch(pages[0]).then(r=>r.blob()) )).text();
  254. await showPage(1);
  255. })();` +` </script></body></html>`);
  256.  
  257. const download=document.createElement('a');
  258. download.href= await toDataURL(html);
  259. download.target = '_blank';
  260. console.log('Saving to html file, size: ', html.length);
  261. title=document.querySelector('title')?.textContent.split('-').at(-1)?.split('|')[0].trim();
  262. download.download = title +'_'+ (new Date()).toISOString().slice(0,10) + '.html';
  263. download.click();
  264. return;
  265. }