- // ==UserScript==
- // @name Voz save thread to file
- // @description Save your favorite thread into a html file
- // @namespace Violentmonkey Scripts
- // @match *://*.voz.vn/*
- // @grant GM_xmlhttpRequest
- // @version 0.5
- // @run-at document-idle
- // @license MIT
- // ==/UserScript==
- let wT = 20; //in ms
- const saveWithImages=true;
- const agressiveFetch=true; //false de tai trang lan luot, true de lay nhieu (15) cug luc
- if (agressiveFetch) wT+=300;
-
- const sleep = (ms) => new Promise((rs) => setTimeout(rs, ms));
-
- async function zip(data) { // return Blob
- let blob=new Blob([data]);
- const cs = new CompressionStream("gzip");
- const compressedStream = blob.stream().pipeThrough(cs);
- return await new Response(compressedStream).blob();
- }
-
- async function hash(message) {
- const msgUint8 = new TextEncoder().encode(message);
- const hashBuffer = await window.crypto.subtle.digest("SHA-1", msgUint8);
- const hashArray = Array.from(new Uint8Array(hashBuffer));
- return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
- }
-
- function xhr(url, detail) {
- const nurl=new URL(url);
- const option={'url':url, origin:nurl.origin}
-
- if (typeof detail =='string' && /^(?:blob|text|json|arraybuffer|document)$/.test(detail)) option['responseType']=detail;
- if (typeof detail =='object') option=detail;
-
- return new Promise(rs=>{
- option['onloadend']=res=> (res.status==200) ? rs(res.response) : rs(false);
- const c = GM_xmlhttpRequest(option);
- })
- }
-
- let threadId;
- let images={};
- async function convertContent(htmlStr, fetchWrapper=false) {
- dump=new DOMParser();
- let html=dump.parseFromString(htmlStr,'text/html');
-
- html.querySelector('.blockMessage--none')?.remove();
- html.querySelectorAll('form').forEach(el=>el?.remove());
- html.querySelectorAll('div.block').forEach(el=>{
- if (el.matches('.block--messages')) return;
- el.remove() });
-
- html.querySelectorAll('div.p-body-main.p-body-main--withSidebar>*').forEach(el=>{
- if (el.matches('.p-body-content')) return;
- el.remove(); });
- html.querySelector('footer.p-footer')?.remove();
-
-
- html.querySelectorAll('[href]').forEach(el=> {
- let href=el.getAttribute('href');
- if (href.startsWith('/')) el.setAttribute('href','https://voz.vn'+ href);
- });
-
- html.querySelectorAll('[src]').forEach(el=> {
- // if(el.tagName=='SCRIPT') return; //skip script;
- let src=el.getAttribute('src');
- if (src.startsWith('data:image')) el.setAttribute('src',el.getAttribute('data-src'));
- if (src.startsWith('/')) el.setAttribute('src','https://voz.vn'+ src);
- });
-
- html.querySelectorAll('[srcset]').forEach(el=> {
- src=el.getAttribute('srcset').split(',').map(a=>{ if (a.startsWith('/')) return 'https://voz.vn'+a }).join(',');
- el.setAttribute('srcset',src);
- });
-
- //Expand long quote
- html.querySelectorAll('div.bbCodeBlock-content>div.bbCodeBlock-expandContent.js-expandContent').forEach(el=>el.className='')
-
- //Spoiler
- html.querySelectorAll('.bbCodeSpoiler-button,.bbCodeSpoiler-content').forEach(el=>el.classList.add('is-active'))
-
- // Sửa link trang
- html.querySelectorAll('div.pageNav a').forEach(el=>{
- el.removeAttribute('href');
- });
-
- if(saveWithImages) {
- // let p= html.querySelectorAll('img.bbImage');
- let p= html.querySelectorAll('img');
- for (let i=0; i<p.length; i++) {
- if (p[i].src.startsWith('data:image')) continue;
- const key=await hash(p[i].src);
- if (images[key]!==undefined) continue;
- const org=new URL(p[i].src);
- const b=await xhr(p[i].src,'blob'); //GM_xmlhttpRequest can bypass cors
- const c=(await toDataURL(b)).replace(/data:.*;base64,/,'data:image;base64,');
- images[key]=c;
- p[i].setAttribute('image-data',key);
- }
- }
-
- if (fetchWrapper) {
- const styles = html.querySelectorAll('link[rel="stylesheet"]')
- for (let i=0; i<styles.length; i++) styles[i].href=await toDataURL(await xhr(styles[i].href,'blob'));
- }
-
- let threadBody=await toDataURL(await zip(html.querySelector('div.p-body-main')?.outerHTML));
- html.querySelector('div.p-body-main').outerHTML=`{ThreadBody_${threadId}}`;
- htmlStr=fetchWrapper ? await toDataURL(await zip(new XMLSerializer().serializeToString(html))) : '';
- return {threadWrapper:htmlStr, threadBody}
- }
-
- function toDataURL(data) {
- return new Promise((rs,rj)=>{
- const fs=new FileReader();
- fs.onload=()=> rs(fs.result);
- fs.onerror=()=>rj(fs.error);
- fs.readAsDataURL(new Blob([data]));
- });
- }
-
- async function saveThread() {
- const maxPage = parseInt(document.querySelector("ul.pageNav-main>li:last-of-type>a")?.textContent)??1;
- document.body.insertAdjacentHTML("beforeend",`<progress id="us_vtstf_progress" value=0 max=100 style="position:fixed;left:2px; bottom: 1px; width:99%"></progress>`);
- const progressBar=document.querySelector('#us_vtstf_progress');
- progressBar.max=maxPage;
-
- let pages=[]
- async function fetchContent(pageNo) {
- let pageUrl = `https://voz.vn/t/${threadId}/page-${pageNo}`;
- let data= await fetch(pageUrl).then((res)=> {
- if (res.status !==200) return false;
- return res.text(); });
- if (!data) return false;
-
- let { threadWrapper, threadBody } = await convertContent(data, pageNo==1);
- if (pageNo==1) {pages[0]= threadWrapper; pages[1]=threadBody;}
- else pages[pageNo]=threadBody;
- progressBar.value=pages.length-1;
- return true;
- }
-
- if(agressiveFetch) {
- const runsNumber=15;
- let i=1;
- while (i<=maxPage) {
- let run=[];
- while (run.length<runsNumber && i<=maxPage) {
- run.push(fetchContent(i));
- i++; }
- const result = (await Promise.all(run)).filter((el, i)=>{if (!el) return i});
- if(result.length>1) {
- i=Math.min(...result);
- console.log('Sâm thinh roong, wait for 2s to continue');
- await sleep(2000);}
- await sleep(wT);
- }
- } else {
- for (let i = 1; i <= maxPage; i++) {
- await fetchContent(i);
- await sleep(wT); }
- }
- progressBar.remove();
- return JSON.stringify(pages);
- }
-
- (async function main() {
- //create SaveThread Button
- dump = location.href.match(/https:\/\/(?:.*\.)?voz.vn\/(f|t)\/[a-z\d\-]+.(\d+)\/?(page-(\d+))?/);
- let fOrT;
- if (dump) { fOrT = dump[1]; threadId = dump[2]; }
-
- if (fOrT == "t") {
- const btnSave = document.createElement("a");
- btnSave.classList.add("pageNav-jump", "pageNav-jump--next");
- btnSave.textContent = "Save Thread to file";
- btnSave.onclick = async ()=>await exportToFile(await saveThread());
- btnSave.style = "cursor:pointer;";
- document.querySelectorAll("ul.pageNav-main")
- .forEach((el, i) =>i == 0 ? el.parentElement.appendChild(btnSave) : ((dump = btnSave.cloneNode(true)), (dump.onclick = async ()=>await exportToFile(await saveThread())),el.parentElement.appendChild(dump)));
- }
- })();
-
- async function exportToFile(pages) {
- let html = `<html>
- <head>
- </head>
- <body>
- <div id="screen"></div>
- <style>
- #screen div.pageNav a{cursor: pointer;}
- </style>
- <script>
- const threadBodyReplacement = '{ThreadBody_${threadId}}';
-
- async function unZip(data) { //return Blob, lấy text thì them await .text()
- let blob=new Blob([data]);
- const ds = new DecompressionStream("gzip");
- const decompressedStream = blob.stream().pipeThrough(ds);
- return await new Response(decompressedStream).blob();
- }
-
- let screen = document.getElementById('screen');
- let threadWrapper; //load in main()
-
- async function showPage(pageId=1) {
- const threadBody= await (await unZip(await fetch(pages[pageId]).then(a=>a.blob()) )).text();
- const pageContent= threadWrapper.replace(threadBodyReplacement,threadBody);
- screen.innerHTML=pageContent;
-
- //load images
- screen.querySelectorAll('img').forEach(el=>{
- const key=el.getAttribute('image-data');
- /(?:[0-9a-f][0-9a-f])+/.test(key) ? el.src=images[key] :''; //createObjectURL tu dataUrl bao loi, khong chuyen qua, mat cong
- });
-
- //Page number click
- screen.querySelectorAll('ul.pageNav-main a:not([id])').forEach(el=> el.addEventListener('click',e=> {
- e.preventDefault();
- showPage(parseInt(e.target.textContent.trim()));
- }));
-
- //Goto page Click
- screen.querySelectorAll('ul.pageNav-main a[title="Go to page"]')?.forEach(el => el.addEventListener('click',e=>{
- let pageNo = prompt('Enter page number','1');
- if (isNaN(pageNo)) return;
- showPage(pageNo);
- })
- )
-
- //Next Click
- screen.querySelectorAll('.pageNav-jump.pageNav-jump--next')?.forEach(el=>el.addEventListener('click',(e)=> {
- showPage(parseInt(pageId)+1);
- } ));
-
- //Prev Click
- screen.querySelectorAll('.pageNav-jump.pageNav-jump--prev')?.forEach(el=>el.addEventListener('click',(e)=> {
- showPage(parseInt(pageId)-1);
- } ));
-
- scrollTo(0,0);
- }
-
- </script></body></html>`
-
- html=html.replace(`</script></body></html>`, 'var pages='+ pages+';\n'+
- 'var images= ' + JSON.stringify(images) + ';\n' +
- `(async function main() {
- threadWrapper=await (await unZip(await fetch(pages[0]).then(r=>r.blob()) )).text();
- await showPage(1);
- })();` +` </script></body></html>`);
-
- const download=document.createElement('a');
- download.href= await toDataURL(html);
- download.target = '_blank';
- console.log('Saving to html file, size: ', html.length);
- title=document.querySelector('title')?.textContent.split('-').at(-1)?.split('|')[0].trim();
- download.download = title +'_'+ (new Date()).toISOString().slice(0,10) + '.html';
- download.click();
- return;
- }