Wayback Machine Small Bug Fixes

Fixes encoded ampersands on Wayback Machine's captures graph and problems that arise when trailing slashes are missing in an URL and other small issues universally present in all crawled sites

2016-06-07 يوللانغان نەشرى. ئەڭ يېڭى نەشرىنى كۆرۈش.

  1. // ==UserScript==
  2. // @name Wayback Machine Small Bug Fixes
  3. // @namespace DoomTay
  4. // @description Fixes encoded ampersands on Wayback Machine's captures graph and problems that arise when trailing slashes are missing in an URL and other small issues universally present in all crawled sites
  5. // @version 1.3.0
  6. // @include http://web.archive.org/web/*
  7. // @include http://wayback.archive.org/web/*
  8. // @include https://web.archive.org/web/*
  9. // @include https://wayback.archive.org/web/*
  10. // @run-at document-start
  11. // @exclude /\*/
  12. // @grant none
  13.  
  14. // ==/UserScript==
  15.  
  16. var lastFolder = window.location.href.substring(window.location.href.lastIndexOf("/") + 1);
  17. var pics = document.images;
  18. var backgrounds = document.querySelectorAll("[background]");
  19. var shouldHaveTrailingSlash = (window.location.href.lastIndexOf(".") < window.location.href.lastIndexOf("/") || window.location.href.substring(window.location.href.lastIndexOf("//") + 2) == lastFolder) && !lastFolder.includes("?");
  20. var hasTrailingSlash = window.location.href.endsWith("/");
  21. var domain = window.location.href.substring(0,window.location.href.indexOf("/",window.location.href.lastIndexOf("//") + 2));
  22. var isInDomain = window.location.href == domain || window.location.href == domain + "/";
  23. var timestamp = /web\/(\d{1,14})/.exec(window.location.href)[1];
  24. var checkedNodes = [];
  25.  
  26. if(!document.getElementsByTagName("base")[0])
  27. {
  28. var base = document.createElement("base");
  29. if(shouldHaveTrailingSlash && !hasTrailingSlash) base.href = window.location.href + "/";
  30. else if((!hasTrailingSlash && !shouldHaveTrailingSlash) || hasTrailingSlash) base.href = document.baseURI;
  31. else base.href = domain + "/";
  32. document.head.appendChild(base);
  33. }
  34.  
  35. function relativeToAbsolute(bgURL)
  36. {
  37. var img = new Image();
  38. img.src = bgURL;
  39. return img.src;
  40. }
  41.  
  42. function fixURL(URL)
  43. {
  44. if(URL.includes(document.domain)) return domain + URL.substring(URL.indexOf("/",URL.lastIndexOf("//") + 2));
  45. else return domain.substring(0,domain.indexOf("/http") + 1) + URL.substring(URL.indexOf("/http") + 1);
  46. }
  47.  
  48. function fixImage(pic)
  49. {
  50. var oldSrc = pic.getAttribute("src");
  51. if(!oldSrc) return;
  52. if(oldSrc.includes("archive.org")) oldSrc = oldSrc.substring(oldSrc.indexOf(document.domain) + document.domain.length);
  53. if(oldSrc.startsWith("/")&& !oldSrc.startsWith("/web/")) pic.setAttribute("src",domain + oldSrc);
  54. else if (oldSrc.startsWith("../") && isInDomain) pic.setAttribute("src",oldSrc.substring(3));
  55. else pic.setAttribute("src",domain.substring(0,domain.indexOf("/http") + 1) + pic.src.substring(pic.src.indexOf("/http") + 1));
  56. }
  57.  
  58. function changeBackground(node, newBackground)
  59. {
  60. if(node.background) node.background = newBackground;
  61. else if(node.getAttribute("background")) node.setAttribute("background",newBackground);
  62. }
  63.  
  64. var observer = new MutationObserver(function(mutations) {
  65. mutations.forEach(function(mutation) {
  66. if(mutation.target.nodeName != "HEAD" && mutation.target.nodeName != "BODY") checkMutations(mutation.target);
  67. if(checkedNodes.includes(mutation.target)) checkedNodes.splice(checkedNodes.indexOf(mutation.target),1);
  68. for(var i = 0; i < mutation.addedNodes.length; i++)
  69. {
  70. if(mutation.addedNodes[i].nodeName == "BASE")
  71. {
  72. if(base) document.head.removeChild(base);
  73. Array.prototype.forEach.call(pics, function(pic) {
  74. pic.src = pic.src;
  75. });
  76. Array.prototype.forEach.call(backgrounds, function(background) {
  77. background.setAttribute("background",background.getAttribute("background"));
  78. });
  79. }
  80. else if(mutation.addedNodes[i].nodeName == "SCRIPT" && mutation.addedNodes[i].innerHTML.includes("__wm.bt()"))
  81. {
  82. var toolbarScript = Array.prototype.find.call(mutation.addedNodes[i].parentNode.childNodes,node => node.innerHTML && node.innerHTML.includes("wbCurrentUrl") && node.innerHTML.includes("&amp;"));
  83. if(toolbarScript)
  84. {
  85. var replacement = document.createElement("script");
  86. replacement.type = toolbarScript.type;
  87. var currentURL = toolbarScript.innerHTML.match(/wbCurrentUrl = "(.+)";/)[1];
  88. replacement.innerHTML = toolbarScript.innerHTML.replace(currentURL,decodeHTML(currentURL));
  89. replacement.innerHTML = replacement.innerHTML.replace("bt:bootstrap}","bt:bootstrap,ff:\"balls\"}");
  90. toolbarScript.parentNode.replaceChild(replacement,toolbarScript);
  91. }
  92. }
  93. else checkMutations(mutation.addedNodes[i]);
  94. }
  95. });
  96. });
  97.  
  98. function decodeHTML(text) {
  99. var textarea = document.createElement("textarea");
  100. textarea.innerHTML = text;
  101. return textarea.value;
  102. }
  103.  
  104. function checkMutations(node)
  105. {
  106. if(document.getElementById("wm-ipp") && document.getElementById("wm-ipp").contains(node)) return;
  107. if(node.nodeType == 1)
  108. {
  109. if(node.nodeName == "IMG" && !node.src.includes("/web/") && !node.src.includes("data:"))
  110. {
  111. observer.disconnect();
  112. fixImage(node);
  113. observer.observe(document.body || document, config);
  114. }
  115. var bg = node.background || node.getAttribute("background");
  116. if(bg && !relativeToAbsolute(bg).includes(document.domain + "/web"))
  117. {
  118. observer.disconnect();
  119. changeBackground(node,fixURL(relativeToAbsolute(bg)));
  120. observer.observe(document.body || document, config);
  121. }
  122. if(node.nodeName == "A" && node.href.includes("/http://web.archive.org/web/")) node.href = node.href.substring(node.href.indexOf("/http://web.archive.org/web/") + 1);
  123. }
  124. checkedNodes.push(node);
  125. for(var n = 0; n < node.childNodes.length; n++)
  126. {
  127. if(!checkedNodes.includes(node.childNodes[n])) checkMutations(node.childNodes[n]);
  128. }
  129. }
  130.  
  131. var config = { attributes: true, childList: true, subtree: true };
  132. observer.observe(document.body || document, config);