Save baidu wenku doc, press Ctrl-S after page load done. Disable Ads blockers if save failed.

Save baidu wenku doc as html. Need to Scroll down the web page to load all pages before press Ctrl-S to save, don't scroll too fast to ensure page of doc load complete, prefer use key <Page Down> on your keyboard. Chrome can not load saved html since it blocked some offline js script, Save to PDF in Yandex Browser is a better choice.

  1. // ==UserScript==
  2. // @namespace ATGT
  3. // @name Save baidu wenku doc, press Ctrl-S after page load done. Disable Ads blockers if save failed.
  4. // @name:zh-CN 保存百度文库中的文档,页面加载完后按Ctrl-S。如保存失败,请关闭广告过滤插件,如ABP、uBlock等。
  5. // @description Save baidu wenku doc as html. Need to Scroll down the web page to load all pages before press Ctrl-S to save, don't scroll too fast to ensure page of doc load complete, prefer use key <Page Down> on your keyboard. Chrome can not load saved html since it blocked some offline js script, Save to PDF in Yandex Browser is a better choice.
  6. // @description:zh-CN 将百度文库中的文档保存为html。保存前需要滚动页面到底部,不要滚太快以确保页面加载成功,最好用键盘上的<Page Down>键。因为Chrome屏蔽了部分离线js脚本,导致Chrome无法加载保存的html,在Yandex Browser中保存成PDF比较好。
  7. // @version 1.2.2
  8. // @icon https://www.baidu.com/cache/icon/favicon.ico
  9. // @include http*://wenku.baidu.com/*
  10.  
  11. // @run-at document-start
  12. // ==/UserScript==
  13.  
  14. /*
  15. ChangeLog:
  16. v1.2:
  17. 28 Nov 2018, update descript for chrome, need to save as pdf.
  18. v1.1:
  19. 28 Nov 2018, remove copy limit.
  20. v1:
  21. 28 Nov 2018, Remove ads and show all pages.
  22. */
  23.  
  24. console.log("!!!!!!!!!!!!!!!!!!!!!save-baidu-wenku-doc!!!!!!!!!!!!!!!!!!!!!!!!");
  25. (function () {
  26.  
  27. function injectFunction(func) {
  28. var script = document.createElement('script');
  29. //script.className = "injectFunction";
  30. //script.appendChild(document.createTextNode('('+ func +')();'));
  31. script.appendChild(document.createTextNode('(function (){' + '(' + func + ')();' + '})();'));
  32. try {
  33. (document.body || document.head || document.documentElement).appendChild(script);
  34. } catch(e) {
  35. console.error("Can not inject function ", func);
  36. }
  37. }
  38. function injectFunctionContent(func) {
  39. var script = document.createElement('script');
  40. //script.className = "injectFunction";
  41. //script.appendChild(document.createTextNode('('+ func +')();'));
  42. var funcContent = func.toSource();
  43. if (/^function\s*[\w\d_$]*\(.*\)\s*\{/.test(funcContent)) {
  44. funcContent = funcContent.replace(/^function\s*[\w\d_$]*\(.*\)\s*\{/, '');
  45. funcContent = funcContent.replace(/}$/g, '');
  46. }
  47. script.appendChild(document.createTextNode(funcContent));
  48. (document.body || document.head || document.documentElement).appendChild(script);
  49. }
  50.  
  51. function saveConsole() {
  52. window.console_debug = console.debug;
  53. window.console_log = console.log;
  54. window.console_info = console.info;
  55. window.console_error = console.error;
  56.  
  57. }
  58. injectFunction(saveConsole);
  59.  
  60. function hookModJS() {
  61. console_info("--------> hookModJS");
  62. var require,
  63. define;
  64. !function (obj) {
  65. function create_async_script(script_src, load_fail_handler) {
  66. console_info("create_async_script", script_src);
  67. function load_ok() {
  68. clearTimeout(scriptLoadTimer)
  69. }
  70. if (!(script_src in async_script_list)) {
  71. async_script_list[script_src] = !0;
  72. var script = document.createElement('script');
  73. if (load_fail_handler) {
  74. var scriptLoadTimer = setTimeout(load_fail_handler, require.timeout);
  75. script.onerror = function () {
  76. clearTimeout(scriptLoadTimer),
  77. load_fail_handler()
  78. },
  79. 'onload' in script ? script.onload = load_ok : script.onreadystatechange = function () {
  80. ('loaded' == this.readyState || 'complete' == this.readyState) && load_ok()
  81. }
  82. }
  83. return script.type = 'text/javascript',
  84. script.src = script_src,
  85. head.appendChild(script),
  86. script
  87. }
  88. }
  89. function async_load_script(libName, fn_clean_up, fn_fail_handler) {
  90. var cleanUpHandlerList = libCleanUpHandlerList_Map[libName] || (libCleanUpHandlerList_Map[libName] = []);
  91. cleanUpHandlerList.push(fn_clean_up);
  92. var script_src,
  93. libAttr = libAttr_Map[libName] || {},
  94. pkg = libAttr.pkg;
  95. script_src = pkg ? libPkgNameList_Map[pkg].url : libAttr.url || libName,
  96. create_async_script(script_src, fn_fail_handler && function () {
  97. fn_fail_handler(libName)
  98. })
  99. }
  100. var head = document.getElementsByTagName('head')[0],
  101. libCleanUpHandlerList_Map = {
  102. },
  103. libFunc_Map = {
  104. },
  105. libMod_Map = {
  106. },
  107. async_script_list = {
  108. },
  109. libAttr_Map = {
  110. },
  111. libPkgNameList_Map = {
  112. };
  113. define = function (libName, func) {
  114. console_info("define ", libName);
  115. libFunc_Map[libName] = func;
  116. var cleanUpHandlerList = libCleanUpHandlerList_Map[libName];
  117. if (cleanUpHandlerList) {
  118. for (var n = 0, o = cleanUpHandlerList.length; o > n; n++)
  119. cleanUpHandlerList[n]();
  120. delete libCleanUpHandlerList_Map[libName]
  121. }
  122. },
  123. require = function (libName) {
  124. //console_info("require ", libName);
  125. if (libName && libName.splice) {
  126. //console_info("call require.async");
  127. return require.async.apply(this, arguments);
  128. }
  129. libName = require.alias(libName);
  130. //console_info("alias", libName);
  131. var module = libMod_Map[libName];
  132. if (module) {
  133. //console_info("found in libModMap, return.");
  134. return module.exports;
  135. }
  136. var modImplFunc = libFunc_Map[libName];
  137. if (!modImplFunc) {
  138. //console_info("Can not found module");
  139. throw '[ModJS] Cannot find module `' + libName + '`';
  140. }
  141. module = libMod_Map[libName] = {
  142. exports: {
  143. }
  144. };
  145. //console_info("modImplFunc", modImplFunc);
  146. var n = 'function' == typeof modImplFunc ? modImplFunc.apply(module, [
  147. require,
  148. module.exports,
  149. module
  150. ]) : modImplFunc;
  151. //console_info("n", n, "module", module);
  152. if (libName === 'wkcommon:widget/ui/reader/view/doc/view.js') {
  153. console_info('++++++++++++++++++ hook doc/view.js');
  154. module.exports.view.prototype._recycleView = function(e) {
  155. console_info("hooked _recycleView called");
  156. };
  157. } else if (libName == "wkcommon:widget/ui/reader_plugin/copylimit/copylimit.js") {
  158. console_info('++++++++++++++++++ hook copylimit/copylimit.js');
  159. module.exports.prototype.isCanCopy = function(e) {
  160. console_info("hooked isCanCopy called");
  161. return true;
  162. };
  163. }
  164. return n && (module.exports = n),
  165. module.exports
  166. },
  167. require.async = function (libList_param, fn_clean_up_handler, fn_fail_handler) {
  168. function async_load_script_list(libList_tmp) {
  169. for (var r = 0, n = libList_tmp.length; n > r; r++) {
  170. var libName = libList_tmp[r];
  171. if (libName in libFunc_Map) {
  172. var libAttr = libAttr_Map[libName];
  173. libAttr && 'deps' in libAttr && async_load_script_list(libAttr.deps)
  174. } else if (!(libName in loading_status)) {
  175. loading_status[libName] = !0,
  176. load_cnt++ ,
  177. async_load_script(libName, fn_clean_up, fn_fail_handler);
  178. var libAttr = libAttr_Map[libName];
  179. libAttr && 'deps' in libAttr && async_load_script_list(libAttr.deps)
  180. }
  181. }
  182. }
  183. function fn_clean_up() {
  184. if (0 == load_cnt--) {
  185. for (var modList = [], i = 0, a = libList_param.length; a > i; i++)
  186. modList[i] = require(libList_param[i]);
  187. fn_clean_up_handler && fn_clean_up_handler.apply(obj, modList)
  188. }
  189. }
  190. 'string' == typeof libList_param && (libList_param = [libList_param]);
  191. for (var c = 0, f = libList_param.length; f > c; c++)
  192. libList_param[c] = require.alias(libList_param[c]);
  193. var loading_status = {},
  194. load_cnt = 0;
  195. async_load_script_list(libList_param),
  196. fn_clean_up()
  197. },
  198. require.resourceMap = function (resMap) {
  199. var r,
  200. tmp_list;
  201. tmp_list = resMap.res;
  202. for (r in tmp_list)
  203. tmp_list.hasOwnProperty(r) && (libAttr_Map[r] = tmp_list[r]);
  204. tmp_list = resMap.pkg;
  205. for (r in tmp_list)
  206. tmp_list.hasOwnProperty(r) && (libPkgNameList_Map[r] = tmp_list[r])
  207. },
  208. require.loadJs = function (e) {
  209. create_async_script(e)
  210. },
  211. require.loadCss = function (e) {
  212. if (e.content) {
  213. var style = document.createElement('style');
  214. style.type = 'text/css',
  215. style.styleSheet ? style.styleSheet.cssText = e.content : style.innerHTML = e.content,
  216. head.appendChild(style)
  217. } else if (e.url) {
  218. var link = document.createElement('link');
  219. link.href = e.url,
  220. link.rel = 'stylesheet',
  221. link.type = 'text/css',
  222. head.appendChild(link)
  223. }
  224. },
  225. require.alias = function (libName) {
  226. return libName
  227. },
  228. require.timeout = 5000
  229. }(this);
  230. Object.defineProperty(window, 'require', {
  231. get: function () { return require; },
  232. set: function (v) {
  233. //console_info('RO Value! New value: ' + v);
  234. }
  235. });
  236. Object.defineProperty(window, 'define', {
  237. get: function () { return define; },
  238. set: function (v) {
  239. //console_info('RO Value! New value: ' + v);
  240. }
  241. });
  242. /* IMPORTANT
  243. * must freeze `require', otherwize require.alias/.async/.resouceMap will be modified.
  244. */
  245. Object.freeze(define);
  246. Object.freeze(require);
  247. }
  248. injectFunction(hookModJS);
  249.  
  250. function tunePage() {
  251. function continueRead() {
  252. try {
  253. /* _recycleView hooked in hookModJS() */
  254. var v = require('wkcommon:widget/ui/reader/view/doc/view.js').view.prototype._recycleView;
  255. } catch(e) {
  256. console_error(e.toString());
  257. var v = e.toString();
  258. }
  259. console_info("doc/view.js ... _recycleView:", v.toSource());
  260. console_info("continueRead >>> ");
  261. $('.doc-reader').attr('oncopy', "");
  262. $(".goBtn").click();
  263. }
  264.  
  265. function removeUseless() {
  266. var rmNodes = [
  267. '.fix-searchbar-wrap',
  268. '.doc-tag',
  269. '.reader-tools-bar-wrap',
  270. '#Zuniqueid__3',
  271. '[id^=html-reader-AD]',
  272. '.banner-core-wrap',
  273. '.fc-container',
  274. '.fc-ppt',
  275. '#ggbtm-ads',
  276. '.banner-ad',
  277. ];
  278. for (var node of rmNodes) {
  279. var tmpNodes = $(node);
  280. if (tmpNodes.length == 0)
  281. console_info("No node found for " + node);
  282. else {
  283. $(node).hide();
  284. $(node).remove();
  285. }
  286. }
  287. }
  288.  
  289. window.addEventListener("load", function () {
  290. console_info("ready!");
  291. continueRead();
  292. removeUseless();
  293. });
  294. };
  295. injectFunction(tunePage);
  296. })();
  297.  
  298. console.log("!!!!!!!!!!!!!!!!!!!!!/save-baidu-wenku-doc!!!!!!!!!!!!!!!!!!!!!!!!");