PublicSuffixList.Js

A javascript utility to make use of Mozilla Foundation's Public Suffix List ("PSL").

Αυτός ο κώδικας δεν πρέπει να εγκατασταθεί άμεσα. Είναι μια βιβλιοθήκη για άλλους κώδικες που περιλαμβάνεται μέσω της οδηγίας meta // @require https://update.greatest.deepsurf.us/scripts/23069/146621/PublicSuffixListJs.js

  1. // ==UserScript==
  2. // @name PublicSuffixList.Js
  3. // @version 1.0.0
  4. // @description A javascript utility to make use of Mozilla Foundation's Public Suffix List ("PSL").
  5. // @author gorhill
  6. // @license GPL v3
  7. // ==/UserScript==
  8.  
  9. /*******************************************************************************
  10.  
  11. publicsuffixlist.js - an efficient javascript implementation to deal with
  12. Mozilla Foundation's Public Suffix List <http://publicsuffix.org/list/>
  13. Copyright (C) 2013 Raymond Hill
  14.  
  15. License: pick the one which suits you:
  16. GPL v3 see <https://www.gnu.org/licenses/gpl.html>
  17. APL v2 see <http://www.apache.org/licenses/LICENSE-2.0>
  18. */
  19.  
  20. /*! Home: https://github.com/gorhill/publicsuffixlist.js -- GPLv3 APLv2 */
  21.  
  22. /*
  23. This code is mostly dumb: I consider this to be lower-level code, thus
  24. in order to ensure efficiency, the caller is responsible for sanitizing
  25. the inputs.
  26. */
  27.  
  28. /******************************************************************************/
  29.  
  30. // A single instance of PublicSuffixList is enough.
  31.  
  32. ;(function(root) {
  33.  
  34. /******************************************************************************/
  35.  
  36. var exceptions = {};
  37. var rules = {};
  38. var selfieMagic = 'iscjsfsaolnm';
  39.  
  40. // This value dictate how the search will be performed:
  41. // < this.cutoffLength = indexOf()
  42. // >= this.cutoffLength = binary search
  43. var cutoffLength = 480;
  44. var mustPunycode = /[^a-z0-9.-]/;
  45.  
  46. /******************************************************************************/
  47.  
  48. // In the context of this code, a domain is defined as:
  49. // "{label}.{public suffix}".
  50. // A single standalone label is a public suffix as per
  51. // http://publicsuffix.org/list/:
  52. // "If no rules match, the prevailing rule is '*' "
  53. // This means 'localhost' is not deemed a domain by this
  54. // code, since according to the definition above, it would be
  55. // evaluated as a public suffix. The caller is therefore responsible to
  56. // decide how to further interpret such public suffix.
  57. //
  58. // `hostname` must be a valid ascii-based hostname.
  59.  
  60. function getDomain(hostname) {
  61. // A hostname starting with a dot is not a valid hostname.
  62. if ( !hostname || hostname.charAt(0) === '.' ) {
  63. return '';
  64. }
  65. hostname = hostname.toLowerCase();
  66. var suffix = getPublicSuffix(hostname);
  67. if ( suffix === hostname ) {
  68. return '';
  69. }
  70. var pos = hostname.lastIndexOf('.', hostname.lastIndexOf('.', hostname.length - suffix.length) - 1);
  71. if ( pos <= 0 ) {
  72. return hostname;
  73. }
  74. return hostname.slice(pos + 1);
  75. }
  76.  
  77. /******************************************************************************/
  78.  
  79. // Return longest public suffix.
  80. //
  81. // `hostname` must be a valid ascii-based string which respect hostname naming.
  82.  
  83. function getPublicSuffix(hostname) {
  84. if ( !hostname ) {
  85. return '';
  86. }
  87. // Since we slice down the hostname with each pass, the first match
  88. // is the longest, so no need to find all the matching rules.
  89. var pos;
  90. while ( true ) {
  91. pos = hostname.indexOf('.');
  92. if ( pos < 0 ) {
  93. return hostname;
  94. }
  95. if ( search(exceptions, hostname) ) {
  96. return hostname.slice(pos + 1);
  97. }
  98. if ( search(rules, hostname) ) {
  99. return hostname;
  100. }
  101. if ( search(rules, '*' + hostname.slice(pos)) ) {
  102. return hostname;
  103. }
  104. hostname = hostname.slice(pos + 1);
  105. }
  106. // unreachable
  107. }
  108.  
  109. /******************************************************************************/
  110.  
  111. // Look up a specific hostname.
  112.  
  113. function search(store, hostname) {
  114. // Extract TLD
  115. var pos = hostname.lastIndexOf('.');
  116. var tld, remainder;
  117. if ( pos < 0 ) {
  118. tld = hostname;
  119. remainder = hostname;
  120. } else {
  121. tld = hostname.slice(pos + 1);
  122. remainder = hostname.slice(0, pos);
  123. }
  124. var substore = store[tld];
  125. if ( !substore ) {
  126. return false;
  127. }
  128. // If substore is a string, use indexOf()
  129. if ( typeof substore === 'string' ) {
  130. return substore.indexOf(' ' + remainder + ' ') >= 0;
  131. }
  132. // It is an array: use binary search.
  133. var l = remainder.length;
  134. var haystack = substore[l];
  135. if ( !haystack ) {
  136. return false;
  137. }
  138. var left = 0;
  139. var right = Math.floor(haystack.length / l + 0.5);
  140. var i, needle;
  141. while ( left < right ) {
  142. i = left + right >> 1;
  143. needle = haystack.substr( l * i, l );
  144. if ( remainder < needle ) {
  145. right = i;
  146. } else if ( remainder > needle ) {
  147. left = i + 1;
  148. } else {
  149. return true;
  150. }
  151. }
  152. return false;
  153. }
  154.  
  155. /******************************************************************************/
  156.  
  157. // Parse and set a UTF-8 text-based suffix list. Format is same as found at:
  158. // http://publicsuffix.org/list/
  159. //
  160. // `toAscii` is a converter from unicode to punycode. Required since the
  161. // Public Suffix List contains unicode characters.
  162. // Suggestion: use <https://github.com/bestiejs/punycode.js> it's quite good.
  163.  
  164. function parse(text, toAscii) {
  165. exceptions = {};
  166. rules = {};
  167.  
  168. // http://publicsuffix.org/list/:
  169. // "... all rules must be canonicalized in the normal way
  170. // for hostnames - lower-case, Punycode ..."
  171. text = text.toLowerCase();
  172.  
  173. var lineBeg = 0, lineEnd;
  174. var textEnd = text.length;
  175. var line, store, pos, tld;
  176.  
  177. while ( lineBeg < textEnd ) {
  178. lineEnd = text.indexOf('\n', lineBeg);
  179. if ( lineEnd < 0 ) {
  180. lineEnd = text.indexOf('\r', lineBeg);
  181. if ( lineEnd < 0 ) {
  182. lineEnd = textEnd;
  183. }
  184. }
  185. line = text.slice(lineBeg, lineEnd).trim();
  186. lineBeg = lineEnd + 1;
  187.  
  188. if ( line.length === 0 ) {
  189. continue;
  190. }
  191.  
  192. // Ignore comments
  193. pos = line.indexOf('//');
  194. if ( pos >= 0 ) {
  195. line = line.slice(0, pos);
  196. }
  197.  
  198. // Ignore surrounding whitespaces
  199. line = line.trim();
  200. if ( !line ) {
  201. continue;
  202. }
  203.  
  204. if ( mustPunycode.test(line) ) {
  205. line = toAscii(line);
  206. }
  207.  
  208. // Is this an exception rule?
  209. if ( line.charAt(0) === '!' ) {
  210. store = exceptions;
  211. line = line.slice(1);
  212. } else {
  213. store = rules;
  214. }
  215.  
  216. // Extract TLD
  217. pos = line.lastIndexOf('.');
  218. if ( pos < 0 ) {
  219. tld = line;
  220. } else {
  221. tld = line.slice(pos + 1);
  222. line = line.slice(0, pos);
  223. }
  224.  
  225. // Store suffix using tld as key
  226. if ( !store.hasOwnProperty(tld) ) {
  227. store[tld] = [];
  228. }
  229. if ( line ) {
  230. store[tld].push(line);
  231. }
  232. }
  233. crystallize(exceptions);
  234. crystallize(rules);
  235. }
  236.  
  237. /******************************************************************************/
  238.  
  239. // Cristallize the storage of suffixes using optimal internal representation
  240. // for future look up.
  241.  
  242. function crystallize(store) {
  243. var suffixes, suffix, i, l;
  244.  
  245. for ( var tld in store ) {
  246. if ( !store.hasOwnProperty(tld) ) {
  247. continue;
  248. }
  249. suffixes = store[tld].join(' ');
  250. // No suffix
  251. if ( !suffixes ) {
  252. store[tld] = '';
  253. continue;
  254. }
  255. // Concatenated list of suffixes less than cutoff length:
  256. // Store as string, lookup using indexOf()
  257. if ( suffixes.length < cutoffLength ) {
  258. store[tld] = ' ' + suffixes + ' ';
  259. continue;
  260. }
  261. // Concatenated list of suffixes greater or equal to cutoff length
  262. // Store as array keyed on suffix length, lookup using binary search.
  263. // I borrowed the idea to key on string length here:
  264. // http://ejohn.org/blog/dictionary-lookups-in-javascript/#comment-392072
  265.  
  266. i = store[tld].length;
  267. suffixes = [];
  268. while ( i-- ) {
  269. suffix = store[tld][i];
  270. l = suffix.length;
  271. if ( !suffixes[l] ) {
  272. suffixes[l] = [];
  273. }
  274. suffixes[l].push(suffix);
  275. }
  276. l = suffixes.length;
  277. while ( l-- ) {
  278. if ( suffixes[l] ) {
  279. suffixes[l] = suffixes[l].sort().join('');
  280. }
  281. }
  282. store[tld] = suffixes;
  283. }
  284. return store;
  285. }
  286.  
  287. /******************************************************************************/
  288.  
  289. function toSelfie() {
  290. return {
  291. magic: selfieMagic,
  292. rules: rules,
  293. exceptions: exceptions
  294. };
  295. }
  296.  
  297. function fromSelfie(selfie) {
  298. if ( typeof selfie !== 'object' || typeof selfie.magic !== 'string' || selfie.magic !== selfieMagic ) {
  299. return false;
  300. }
  301. rules = selfie.rules;
  302. exceptions = selfie.exceptions;
  303. return true;
  304. }
  305.  
  306. /******************************************************************************/
  307.  
  308. // Public API
  309.  
  310. root = root || window;
  311.  
  312. root.publicSuffixList = {
  313. 'version': '1.0',
  314. 'parse': parse,
  315. 'getDomain': getDomain,
  316. 'getPublicSuffix': getPublicSuffix,
  317. 'toSelfie': toSelfie,
  318. 'fromSelfie': fromSelfie
  319. };
  320.  
  321. if ( typeof module !== "undefined" ) {
  322. module.exports = root.publicSuffixList;
  323. } else if ( typeof exports !== "undefined" ) {
  324. exports = root.publicSuffixList;
  325. }
  326.  
  327. /******************************************************************************/
  328.  
  329. })(this);