blob: 427074a6fa03e8e2ba98042806c93e7dac8589cc [file] [log] [blame]
Ed Tanous9b65f1f2017-03-07 15:17:13 -08001/**
2 * @license
3 * Copyright (C) 2013 Google Inc.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
Ed Tanous904063f2017-03-02 16:48:24 -080017
Ed Tanous9b65f1f2017-03-07 15:17:13 -080018/**
19 * @fileoverview
20 * <div style="white-space: pre">
21 * Looks at query parameters to decide which language handlers and style-sheets
22 * to load.
23 *
24 * Query Parameter Format Effect Default
25 * +------------------+---------------+------------------------------+--------+
26 * | autorun= | true | false | If true then prettyPrint() | "true" |
27 * | | | is called on page load. | |
28 * +------------------+---------------+------------------------------+--------+
29 * | lang= | language name | Loads the language handler | Can |
30 * | | | named "lang-<NAME>.js". | appear |
31 * | | | See available handlers at | many |
32 * | | | https://github.com/google/ | times. |
33 * | | | code-prettify/tree/master/ | |
34 * | | | src | |
35 * +------------------+---------------+------------------------------+--------+
36 * | skin= | skin name | Loads the skin stylesheet | none. |
37 * | | | named "<NAME>.css". | |
38 * | | | https://cdn.rawgit.com/ | |
39 * | | | google/code-prettify/master/ | |
40 * | | | styles/index.html | |
41 * +------------------+---------------+------------------------------+--------+
42 * | callback= | JS identifier | When "prettyPrint" finishes | none |
43 * | | | window.exports[js_ident] is | |
44 * | | | called. | |
45 * | | | The callback must be under | |
46 * | | | exports to reduce the risk | |
47 * | | | of XSS via query parameter | |
48 * | | | injection. | |
49 * +------------------+---------------+------------------------------+--------+
50 *
51 * Exmaples
52 * .../run_prettify.js?lang=css&skin=sunburst
53 * 1. Loads the CSS language handler which can be used to prettify CSS
54 * stylesheets, HTML <style> element bodies and style="..." attributes
55 * values.
56 * 2. Loads the sunburst.css stylesheet instead of the default prettify.css
57 * stylesheet.
58 * A gallery of stylesheets is available at
59 * https://cdn.rawgit.com/google/code-prettify/master/styles/index.html
60 * 3. Since autorun=false is not specified, calls prettyPrint() on page load.
61 * </div>
62 */
Ed Tanous904063f2017-03-02 16:48:24 -080063
Ed Tanous9b65f1f2017-03-07 15:17:13 -080064/**
65* @typedef {!Array.<number|string>}
66* Alternating indices and the decorations that should be inserted there.
67* The indices are monotonically increasing.
Ed Tanous904063f2017-03-02 16:48:24 -080068*/
Ed Tanous9b65f1f2017-03-07 15:17:13 -080069var DecorationsT;
70
71/**
72* @typedef {!{
73* sourceNode: !Element,
74* pre: !(number|boolean),
75* langExtension: ?string,
76* numberLines: ?(number|boolean),
77* sourceCode: ?string,
78* spans: ?(Array.<number|Node>),
79* basePos: ?number,
80* decorations: ?DecorationsT
81* }}
82* <dl>
83* <dt>sourceNode<dd>the element containing the source
84* <dt>sourceCode<dd>source as plain text
85* <dt>pre<dd>truthy if white-space in text nodes
86* should be considered significant.
87* <dt>spans<dd> alternating span start indices into source
88* and the text node or element (e.g. {@code <BR>}) corresponding to that
89* span.
90* <dt>decorations<dd>an array of style classes preceded
91* by the position at which they start in job.sourceCode in order
92* <dt>basePos<dd>integer position of this.sourceCode in the larger chunk of
93* source.
94* </dl>
95*/
96var JobT;
97
98/**
99* @typedef {!{
100* sourceCode: string,
101* spans: !(Array.<number|Node>)
102* }}
103* <dl>
104* <dt>sourceCode<dd>source as plain text
105* <dt>spans<dd> alternating span start indices into source
106* and the text node or element (e.g. {@code <BR>}) corresponding to that
107* span.
108* </dl>
109*/
110var SourceSpansT;
111
112/** @define {boolean} */
113var IN_GLOBAL_SCOPE = false;
114
115(function () {
116 "use strict";
117
118 var win = window;
119 var doc = document;
120 var root = doc.documentElement;
121 var head = doc['head'] || doc.getElementsByTagName("head")[0] || root;
122
123 // From http://javascript.nwbox.com/ContentLoaded/contentloaded.js
124 // Author: Diego Perini (diego.perini at gmail.com)
125 // Summary: cross-browser wrapper for DOMContentLoaded
126 // Updated: 20101020
127 // License: MIT
128 // Version: 1.2
129 function contentLoaded(callback) {
130 var addEventListener = doc['addEventListener'];
131 var done = false, top = true,
132 add = addEventListener ? 'addEventListener' : 'attachEvent',
133 rem = addEventListener ? 'removeEventListener' : 'detachEvent',
134 pre = addEventListener ? '' : 'on',
135
136 init = function(e) {
137 if (e.type == 'readystatechange' && doc.readyState != 'complete') {
138 return;
139 }
140 (e.type == 'load' ? win : doc)[rem](pre + e.type, init, false);
141 if (!done && (done = true)) { callback.call(win, e.type || e); }
142 },
143
144 poll = function() {
145 try {
146 root.doScroll('left');
147 } catch(e) {
148 win.setTimeout(poll, 50);
149 return;
150 }
151 init('poll');
152 };
153
154 if (doc.readyState == 'complete') {
155 callback.call(win, 'lazy');
156 } else {
157 if (doc.createEventObject && root.doScroll) {
158 try { top = !win.frameElement; } catch(e) { }
159 if (top) { poll(); }
160 }
161 doc[add](pre + 'DOMContentLoaded', init, false);
162 doc[add](pre + 'readystatechange', init, false);
163 win[add](pre + 'load', init, false);
164 }
165 }
166
167 // Given a list of URLs to stylesheets, loads the first that loads without
168 // triggering an error event.
169 function loadStylesheetsFallingBack(stylesheets) {
170 var n = stylesheets.length;
171 function load(i) {
172 if (i === n) { return; }
173 var link = doc.createElement('link');
174 link.rel = 'stylesheet';
175 link.type = 'text/css';
176 if (i + 1 < n) {
177 // http://pieisgood.org/test/script-link-events/ indicates that many
178 // versions of IE do not support onerror on <link>s, though
179 // http://msdn.microsoft.com/en-us/library/ie/ms535848(v=vs.85).aspx
180 // indicates that recent IEs do support error.
181 link.error = link.onerror = function () { load(i + 1); };
182 }
183 link.href = stylesheets[i];
184 head.appendChild(link);
185 }
186 load(0);
187 }
188
189 var scriptQuery = '';
190 // Look for the <script> node that loads this script to get its parameters.
191 // This starts looking at the end instead of just considering the last
192 // because deferred and async scripts run out of order.
193 // If the script is loaded twice, then this will run in reverse order.
194 var scripts = doc.getElementsByTagName('script');
195 for (var i = scripts.length; --i >= 0;) {
196 var script = scripts[i];
197 var match = script.src.match(
198 /^[^?#]*\/run_prettify\.js(\?[^#]*)?(?:#.*)?$/);
199 if (match) {
200 scriptQuery = match[1] || '';
201 // Remove the script from the DOM so that multiple runs at least run
202 // multiple times even if parameter sets are interpreted in reverse
203 // order.
204 script.parentNode.removeChild(script);
205 break;
206 }
207 }
208
209 // Pull parameters into local variables.
210 var autorun = true;
211 var langs = [];
212 var skins = [];
213 var callbacks = [];
214 scriptQuery.replace(
215 /[?&]([^&=]+)=([^&]+)/g,
216 function (_, name, value) {
217 value = decodeURIComponent(value);
218 name = decodeURIComponent(name);
219 if (name == 'autorun') { autorun = !/^[0fn]/i.test(value); } else
220 if (name == 'lang') { langs.push(value); } else
221 if (name == 'skin') { skins.push(value); } else
222 if (name == 'callback') { callbacks.push(value); }
223 });
224
225 // Use https to avoid mixed content warnings in client pages and to
226 // prevent a MITM from rewrite prettify mid-flight.
227 // This only works if this script is loaded via https : something
228 // over which we exercise no control.
229 var LOADER_BASE_URL =
230 'https://cdn.rawgit.com/google/code-prettify/master/loader';
231
232 for (var i = 0, n = langs.length; i < n; ++i) (function (lang) {
233 var script = doc.createElement("script");
234
235 // Excerpted from jQuery.ajaxTransport("script") to fire events when
236 // a script is finished loading.
237 // Attach handlers for each script
238 script.onload = script.onerror = script.onreadystatechange = function () {
239 if (script && (
240 !script.readyState || /loaded|complete/.test(script.readyState))) {
241 // Handle memory leak in IE
242 script.onerror = script.onload = script.onreadystatechange = null;
243
244 --pendingLanguages;
245 checkPendingLanguages();
246
247 // Remove the script
248 if (script.parentNode) {
249 script.parentNode.removeChild(script);
250 }
251
252 script = null;
253 }
254 };
255
256 script.type = 'text/javascript';
257 script.src = LOADER_BASE_URL
258 + '/lang-' + encodeURIComponent(langs[i]) + '.js';
259
260 // Circumvent IE6 bugs with base elements (#2709 and #4378) by prepending
261 head.insertBefore(script, head.firstChild);
262 })(langs[i]);
263
264 var pendingLanguages = langs.length;
265 function checkPendingLanguages() {
266 if (!pendingLanguages) {
267 win.setTimeout(onLangsLoaded, 0);
268 }
269 }
270
271 var skinUrls = [];
272 for (var i = 0, n = skins.length; i < n; ++i) {
273 skinUrls.push(LOADER_BASE_URL
274 + '/skins/' + encodeURIComponent(skins[i]) + '.css');
275 }
276 skinUrls.push(LOADER_BASE_URL + '/prettify.css');
277 loadStylesheetsFallingBack(skinUrls);
278
279 var prettyPrint = (function () {
280 /**
281 * @license
282 * Copyright (C) 2006 Google Inc.
283 *
284 * Licensed under the Apache License, Version 2.0 (the "License");
285 * you may not use this file except in compliance with the License.
286 * You may obtain a copy of the License at
287 *
288 * http://www.apache.org/licenses/LICENSE-2.0
289 *
290 * Unless required by applicable law or agreed to in writing, software
291 * distributed under the License is distributed on an "AS IS" BASIS,
292 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
293 * See the License for the specific language governing permissions and
294 * limitations under the License.
295 */
296
297 /**
298 * @fileoverview
299 * some functions for browser-side pretty printing of code contained in html.
300 *
301 * <p>
302 * For a fairly comprehensive set of languages see the
303 * <a href="https://github.com/google/code-prettify#for-which-languages-does-it-work">README</a>
304 * file that came with this source. At a minimum, the lexer should work on a
305 * number of languages including C and friends, Java, Python, Bash, SQL, HTML,
306 * XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk
307 * and a subset of Perl, but, because of commenting conventions, doesn't work on
308 * Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.
309 * <p>
310 * Usage: <ol>
311 * <li> include this source file in an html page via
312 * {@code <script type="text/javascript" src="/path/to/prettify.js"></script>}
313 * <li> define style rules. See the example page for examples.
314 * <li> mark the {@code <pre>} and {@code <code>} tags in your source with
315 * {@code class=prettyprint.}
316 * You can also use the (html deprecated) {@code <xmp>} tag, but the pretty
317 * printer needs to do more substantial DOM manipulations to support that, so
318 * some css styles may not be preserved.
319 * </ol>
320 * That's it. I wanted to keep the API as simple as possible, so there's no
321 * need to specify which language the code is in, but if you wish, you can add
322 * another class to the {@code <pre>} or {@code <code>} element to specify the
323 * language, as in {@code <pre class="prettyprint lang-java">}. Any class that
324 * starts with "lang-" followed by a file extension, specifies the file type.
325 * See the "lang-*.js" files in this directory for code that implements
326 * per-language file handlers.
327 * <p>
328 * Change log:<br>
329 * cbeust, 2006/08/22
330 * <blockquote>
331 * Java annotations (start with "@") are now captured as literals ("lit")
332 * </blockquote>
333 * @requires console
334 */
335
336 // JSLint declarations
337 /*global console, document, navigator, setTimeout, window, define */
338
339
340 var HACK_TO_FIX_JS_INCLUDE_PL;
341
342 /**
343 * {@type !{
344 * 'createSimpleLexer': function (Array, Array): (function (JobT)),
345 * 'registerLangHandler': function (function (JobT), Array.<string>),
346 * 'PR_ATTRIB_NAME': string,
347 * 'PR_ATTRIB_NAME': string,
348 * 'PR_ATTRIB_VALUE': string,
349 * 'PR_COMMENT': string,
350 * 'PR_DECLARATION': string,
351 * 'PR_KEYWORD': string,
352 * 'PR_LITERAL': string,
353 * 'PR_NOCODE': string,
354 * 'PR_PLAIN': string,
355 * 'PR_PUNCTUATION': string,
356 * 'PR_SOURCE': string,
357 * 'PR_STRING': string,
358 * 'PR_TAG': string,
359 * 'PR_TYPE': string,
360 * 'prettyPrintOne': function (string, string, number|boolean),
361 * 'prettyPrint': function (?function, ?(HTMLElement|HTMLDocument))
362 * }}
363 * @const
364 */
365 var PR;
366
367 /**
368 * Split {@code prettyPrint} into multiple timeouts so as not to interfere with
369 * UI events.
370 * If set to {@code false}, {@code prettyPrint()} is synchronous.
371 */
372 window['PR_SHOULD_USE_CONTINUATION'] = true;
373
374 /**
375 * Pretty print a chunk of code.
376 * @param {string} sourceCodeHtml The HTML to pretty print.
377 * @param {string} opt_langExtension The language name to use.
378 * Typically, a filename extension like 'cpp' or 'java'.
379 * @param {number|boolean} opt_numberLines True to number lines,
380 * or the 1-indexed number of the first line in sourceCodeHtml.
381 * @return {string} code as html, but prettier
382 */
383 var prettyPrintOne;
384 /**
385 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with
386 * {@code class=prettyprint} and prettify them.
387 *
388 * @param {Function} opt_whenDone called when prettifying is done.
389 * @param {HTMLElement|HTMLDocument} opt_root an element or document
390 * containing all the elements to pretty print.
391 * Defaults to {@code document.body}.
392 */
393 var prettyPrint;
394
395
396 (function () {
397 var win = window;
398 // Keyword lists for various languages.
399 // We use things that coerce to strings to make them compact when minified
400 // and to defeat aggressive optimizers that fold large string constants.
401 var FLOW_CONTROL_KEYWORDS = ["break,continue,do,else,for,if,return,while"];
402 var C_KEYWORDS = [FLOW_CONTROL_KEYWORDS,"auto,case,char,const,default," +
403 "double,enum,extern,float,goto,inline,int,long,register,restrict,short,signed," +
404 "sizeof,static,struct,switch,typedef,union,unsigned,void,volatile"];
405 var COMMON_KEYWORDS = [C_KEYWORDS,"catch,class,delete,false,import," +
406 "new,operator,private,protected,public,this,throw,true,try,typeof"];
407 var CPP_KEYWORDS = [COMMON_KEYWORDS,"alignas,alignof,align_union,asm,axiom,bool," +
408 "concept,concept_map,const_cast,constexpr,decltype,delegate," +
409 "dynamic_cast,explicit,export,friend,generic,late_check," +
410 "mutable,namespace,noexcept,noreturn,nullptr,property,reinterpret_cast,static_assert," +
411 "static_cast,template,typeid,typename,using,virtual,where"];
412 var JAVA_KEYWORDS = [COMMON_KEYWORDS,
413 "abstract,assert,boolean,byte,extends,finally,final,implements,import," +
414 "instanceof,interface,null,native,package,strictfp,super,synchronized," +
415 "throws,transient"];
416 var CSHARP_KEYWORDS = [COMMON_KEYWORDS,
417 "abstract,add,alias,as,ascending,async,await,base,bool,by,byte,checked,decimal,delegate,descending," +
418 "dynamic,event,finally,fixed,foreach,from,get,global,group,implicit,in,interface," +
419 "internal,into,is,join,let,lock,null,object,out,override,orderby,params," +
420 "partial,readonly,ref,remove,sbyte,sealed,select,set,stackalloc,string,select,uint,ulong," +
421 "unchecked,unsafe,ushort,value,var,virtual,where,yield"];
422 var COFFEE_KEYWORDS = "all,and,by,catch,class,else,extends,false,finally," +
423 "for,if,in,is,isnt,loop,new,no,not,null,of,off,on,or,return,super,then," +
424 "throw,true,try,unless,until,when,while,yes";
425 var JSCRIPT_KEYWORDS = [COMMON_KEYWORDS,
426 "abstract,async,await,constructor,debugger,enum,eval,export,function," +
427 "get,implements,instanceof,interface,let,null,set,undefined,var,with," +
428 "yield,Infinity,NaN"];
429 var PERL_KEYWORDS = "caller,delete,die,do,dump,elsif,eval,exit,foreach,for," +
430 "goto,if,import,last,local,my,next,no,our,print,package,redo,require," +
431 "sub,undef,unless,until,use,wantarray,while,BEGIN,END";
432 var PYTHON_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "and,as,assert,class,def,del," +
433 "elif,except,exec,finally,from,global,import,in,is,lambda," +
434 "nonlocal,not,or,pass,print,raise,try,with,yield," +
435 "False,True,None"];
436 var RUBY_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "alias,and,begin,case,class," +
437 "def,defined,elsif,end,ensure,false,in,module,next,nil,not,or,redo," +
438 "rescue,retry,self,super,then,true,undef,unless,until,when,yield," +
439 "BEGIN,END"];
440 var SH_KEYWORDS = [FLOW_CONTROL_KEYWORDS, "case,done,elif,esac,eval,fi," +
441 "function,in,local,set,then,until"];
442 var ALL_KEYWORDS = [
443 CPP_KEYWORDS, CSHARP_KEYWORDS, JAVA_KEYWORDS, JSCRIPT_KEYWORDS,
444 PERL_KEYWORDS, PYTHON_KEYWORDS, RUBY_KEYWORDS, SH_KEYWORDS];
445 var C_TYPES = /^(DIR|FILE|array|vector|(de|priority_)?queue|(forward_)?list|stack|(const_)?(reverse_)?iterator|(unordered_)?(multi)?(set|map)|bitset|u?(int|float)\d*)\b/;
446
447 // token style names. correspond to css classes
448 /**
449 * token style for a string literal
450 * @const
451 */
452 var PR_STRING = 'str';
453 /**
454 * token style for a keyword
455 * @const
456 */
457 var PR_KEYWORD = 'kwd';
458 /**
459 * token style for a comment
460 * @const
461 */
462 var PR_COMMENT = 'com';
463 /**
464 * token style for a type
465 * @const
466 */
467 var PR_TYPE = 'typ';
468 /**
469 * token style for a literal value. e.g. 1, null, true.
470 * @const
471 */
472 var PR_LITERAL = 'lit';
473 /**
474 * token style for a punctuation string.
475 * @const
476 */
477 var PR_PUNCTUATION = 'pun';
478 /**
479 * token style for plain text.
480 * @const
481 */
482 var PR_PLAIN = 'pln';
483
484 /**
485 * token style for an sgml tag.
486 * @const
487 */
488 var PR_TAG = 'tag';
489 /**
490 * token style for a markup declaration such as a DOCTYPE.
491 * @const
492 */
493 var PR_DECLARATION = 'dec';
494 /**
495 * token style for embedded source.
496 * @const
497 */
498 var PR_SOURCE = 'src';
499 /**
500 * token style for an sgml attribute name.
501 * @const
502 */
503 var PR_ATTRIB_NAME = 'atn';
504 /**
505 * token style for an sgml attribute value.
506 * @const
507 */
508 var PR_ATTRIB_VALUE = 'atv';
509
510 /**
511 * A class that indicates a section of markup that is not code, e.g. to allow
512 * embedding of line numbers within code listings.
513 * @const
514 */
515 var PR_NOCODE = 'nocode';
516
517
518
519 /**
520 * A set of tokens that can precede a regular expression literal in
521 * javascript
522 * http://web.archive.org/web/20070717142515/http://www.mozilla.org/js/language/js20/rationale/syntax.html
523 * has the full list, but I've removed ones that might be problematic when
524 * seen in languages that don't support regular expression literals.
525 *
526 * <p>Specifically, I've removed any keywords that can't precede a regexp
527 * literal in a syntactically legal javascript program, and I've removed the
528 * "in" keyword since it's not a keyword in many languages, and might be used
529 * as a count of inches.
530 *
531 * <p>The link above does not accurately describe EcmaScript rules since
532 * it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
533 * very well in practice.
534 *
535 * @private
536 * @const
537 */
538 var REGEXP_PRECEDER_PATTERN = '(?:^^\\.?|[+-]|[!=]=?=?|\\#|%=?|&&?=?|\\(|\\*=?|[+\\-]=|->|\\/=?|::?|<<?=?|>>?>?=?|,|;|\\?|@|\\[|~|{|\\^\\^?=?|\\|\\|?=?|break|case|continue|delete|do|else|finally|instanceof|return|throw|try|typeof)\\s*';
539
540 // CAVEAT: this does not properly handle the case where a regular
541 // expression immediately follows another since a regular expression may
542 // have flags for case-sensitivity and the like. Having regexp tokens
543 // adjacent is not valid in any language I'm aware of, so I'm punting.
544 // TODO: maybe style special characters inside a regexp as punctuation.
545
546 /**
547 * Given a group of {@link RegExp}s, returns a {@code RegExp} that globally
548 * matches the union of the sets of strings matched by the input RegExp.
549 * Since it matches globally, if the input strings have a start-of-input
550 * anchor (/^.../), it is ignored for the purposes of unioning.
551 * @param {Array.<RegExp>} regexs non multiline, non-global regexs.
552 * @return {RegExp} a global regex.
553 */
554 function combinePrefixPatterns(regexs) {
555 var capturedGroupIndex = 0;
556
557 var needToFoldCase = false;
558 var ignoreCase = false;
559 for (var i = 0, n = regexs.length; i < n; ++i) {
560 var regex = regexs[i];
561 if (regex.ignoreCase) {
562 ignoreCase = true;
563 } else if (/[a-z]/i.test(regex.source.replace(
564 /\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {
565 needToFoldCase = true;
566 ignoreCase = false;
567 break;
568 }
569 }
570
571 var escapeCharToCodeUnit = {
572 'b': 8,
573 't': 9,
574 'n': 0xa,
575 'v': 0xb,
576 'f': 0xc,
577 'r': 0xd
578 };
579
580 function decodeEscape(charsetPart) {
581 var cc0 = charsetPart.charCodeAt(0);
582 if (cc0 !== 92 /* \\ */) {
583 return cc0;
584 }
585 var c1 = charsetPart.charAt(1);
586 cc0 = escapeCharToCodeUnit[c1];
587 if (cc0) {
588 return cc0;
589 } else if ('0' <= c1 && c1 <= '7') {
590 return parseInt(charsetPart.substring(1), 8);
591 } else if (c1 === 'u' || c1 === 'x') {
592 return parseInt(charsetPart.substring(2), 16);
593 } else {
594 return charsetPart.charCodeAt(1);
595 }
596 }
597
598 function encodeEscape(charCode) {
599 if (charCode < 0x20) {
600 return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);
601 }
602 var ch = String.fromCharCode(charCode);
603 return (ch === '\\' || ch === '-' || ch === ']' || ch === '^')
604 ? "\\" + ch : ch;
605 }
606
607 function caseFoldCharset(charSet) {
608 var charsetParts = charSet.substring(1, charSet.length - 1).match(
609 new RegExp(
610 '\\\\u[0-9A-Fa-f]{4}'
611 + '|\\\\x[0-9A-Fa-f]{2}'
612 + '|\\\\[0-3][0-7]{0,2}'
613 + '|\\\\[0-7]{1,2}'
614 + '|\\\\[\\s\\S]'
615 + '|-'
616 + '|[^-\\\\]',
617 'g'));
618 var ranges = [];
619 var inverse = charsetParts[0] === '^';
620
621 var out = ['['];
622 if (inverse) { out.push('^'); }
623
624 for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {
625 var p = charsetParts[i];
626 if (/\\[bdsw]/i.test(p)) { // Don't muck with named groups.
627 out.push(p);
628 } else {
629 var start = decodeEscape(p);
630 var end;
631 if (i + 2 < n && '-' === charsetParts[i + 1]) {
632 end = decodeEscape(charsetParts[i + 2]);
633 i += 2;
634 } else {
635 end = start;
636 }
637 ranges.push([start, end]);
638 // If the range might intersect letters, then expand it.
639 // This case handling is too simplistic.
640 // It does not deal with non-latin case folding.
641 // It works for latin source code identifiers though.
642 if (!(end < 65 || start > 122)) {
643 if (!(end < 65 || start > 90)) {
644 ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);
645 }
646 if (!(end < 97 || start > 122)) {
647 ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);
648 }
649 }
650 }
651 }
652
653 // [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]
654 // -> [[1, 12], [14, 14], [16, 17]]
655 ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); });
656 var consolidatedRanges = [];
657 var lastRange = [];
658 for (var i = 0; i < ranges.length; ++i) {
659 var range = ranges[i];
660 if (range[0] <= lastRange[1] + 1) {
661 lastRange[1] = Math.max(lastRange[1], range[1]);
662 } else {
663 consolidatedRanges.push(lastRange = range);
664 }
665 }
666
667 for (var i = 0; i < consolidatedRanges.length; ++i) {
668 var range = consolidatedRanges[i];
669 out.push(encodeEscape(range[0]));
670 if (range[1] > range[0]) {
671 if (range[1] + 1 > range[0]) { out.push('-'); }
672 out.push(encodeEscape(range[1]));
673 }
674 }
675 out.push(']');
676 return out.join('');
677 }
678
679 function allowAnywhereFoldCaseAndRenumberGroups(regex) {
680 // Split into character sets, escape sequences, punctuation strings
681 // like ('(', '(?:', ')', '^'), and runs of characters that do not
682 // include any of the above.
683 var parts = regex.source.match(
684 new RegExp(
685 '(?:'
686 + '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set
687 + '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape
688 + '|\\\\x[A-Fa-f0-9]{2}' // a hex escape
689 + '|\\\\[0-9]+' // a back-reference or octal escape
690 + '|\\\\[^ux0-9]' // other escape sequence
691 + '|\\(\\?[:!=]' // start of a non-capturing group
692 + '|[\\(\\)\\^]' // start/end of a group, or line start
693 + '|[^\\x5B\\x5C\\(\\)\\^]+' // run of other characters
694 + ')',
695 'g'));
696 var n = parts.length;
697
698 // Maps captured group numbers to the number they will occupy in
699 // the output or to -1 if that has not been determined, or to
700 // undefined if they need not be capturing in the output.
701 var capturedGroups = [];
702
703 // Walk over and identify back references to build the capturedGroups
704 // mapping.
705 for (var i = 0, groupIndex = 0; i < n; ++i) {
706 var p = parts[i];
707 if (p === '(') {
708 // groups are 1-indexed, so max group index is count of '('
709 ++groupIndex;
710 } else if ('\\' === p.charAt(0)) {
711 var decimalValue = +p.substring(1);
712 if (decimalValue) {
713 if (decimalValue <= groupIndex) {
714 capturedGroups[decimalValue] = -1;
715 } else {
716 // Replace with an unambiguous escape sequence so that
717 // an octal escape sequence does not turn into a backreference
718 // to a capturing group from an earlier regex.
719 parts[i] = encodeEscape(decimalValue);
720 }
721 }
722 }
723 }
724
725 // Renumber groups and reduce capturing groups to non-capturing groups
726 // where possible.
727 for (var i = 1; i < capturedGroups.length; ++i) {
728 if (-1 === capturedGroups[i]) {
729 capturedGroups[i] = ++capturedGroupIndex;
730 }
731 }
732 for (var i = 0, groupIndex = 0; i < n; ++i) {
733 var p = parts[i];
734 if (p === '(') {
735 ++groupIndex;
736 if (!capturedGroups[groupIndex]) {
737 parts[i] = '(?:';
738 }
739 } else if ('\\' === p.charAt(0)) {
740 var decimalValue = +p.substring(1);
741 if (decimalValue && decimalValue <= groupIndex) {
742 parts[i] = '\\' + capturedGroups[decimalValue];
743 }
744 }
745 }
746
747 // Remove any prefix anchors so that the output will match anywhere.
748 // ^^ really does mean an anchored match though.
749 for (var i = 0; i < n; ++i) {
750 if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }
751 }
752
753 // Expand letters to groups to handle mixing of case-sensitive and
754 // case-insensitive patterns if necessary.
755 if (regex.ignoreCase && needToFoldCase) {
756 for (var i = 0; i < n; ++i) {
757 var p = parts[i];
758 var ch0 = p.charAt(0);
759 if (p.length >= 2 && ch0 === '[') {
760 parts[i] = caseFoldCharset(p);
761 } else if (ch0 !== '\\') {
762 // TODO: handle letters in numeric escapes.
763 parts[i] = p.replace(
764 /[a-zA-Z]/g,
765 function (ch) {
766 var cc = ch.charCodeAt(0);
767 return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';
768 });
769 }
770 }
771 }
772
773 return parts.join('');
774 }
775
776 var rewritten = [];
777 for (var i = 0, n = regexs.length; i < n; ++i) {
778 var regex = regexs[i];
779 if (regex.global || regex.multiline) { throw new Error('' + regex); }
780 rewritten.push(
781 '(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');
782 }
783
784 return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');
785 }
786
787 /**
788 * Split markup into a string of source code and an array mapping ranges in
789 * that string to the text nodes in which they appear.
790 *
791 * <p>
792 * The HTML DOM structure:</p>
793 * <pre>
794 * (Element "p"
795 * (Element "b"
796 * (Text "print ")) ; #1
797 * (Text "'Hello '") ; #2
798 * (Element "br") ; #3
799 * (Text " + 'World';")) ; #4
800 * </pre>
801 * <p>
802 * corresponds to the HTML
803 * {@code <p><b>print </b>'Hello '<br> + 'World';</p>}.</p>
804 *
805 * <p>
806 * It will produce the output:</p>
807 * <pre>
808 * {
809 * sourceCode: "print 'Hello '\n + 'World';",
810 * // 1 2
811 * // 012345678901234 5678901234567
812 * spans: [0, #1, 6, #2, 14, #3, 15, #4]
813 * }
814 * </pre>
815 * <p>
816 * where #1 is a reference to the {@code "print "} text node above, and so
817 * on for the other text nodes.
818 * </p>
819 *
820 * <p>
821 * The {@code} spans array is an array of pairs. Even elements are the start
822 * indices of substrings, and odd elements are the text nodes (or BR elements)
823 * that contain the text for those substrings.
824 * Substrings continue until the next index or the end of the source.
825 * </p>
826 *
827 * @param {Node} node an HTML DOM subtree containing source-code.
828 * @param {boolean|number} isPreformatted truthy if white-space in
829 * text nodes should be considered significant.
830 * @return {SourceSpansT} source code and the nodes in which they occur.
831 */
832 function extractSourceSpans(node, isPreformatted) {
833 var nocode = /(?:^|\s)nocode(?:\s|$)/;
834
835 var chunks = [];
836 var length = 0;
837 var spans = [];
838 var k = 0;
839
840 function walk(node) {
841 var type = node.nodeType;
842 if (type == 1) { // Element
843 if (nocode.test(node.className)) { return; }
844 for (var child = node.firstChild; child; child = child.nextSibling) {
845 walk(child);
846 }
847 var nodeName = node.nodeName.toLowerCase();
848 if ('br' === nodeName || 'li' === nodeName) {
849 chunks[k] = '\n';
850 spans[k << 1] = length++;
851 spans[(k++ << 1) | 1] = node;
852 }
853 } else if (type == 3 || type == 4) { // Text
854 var text = node.nodeValue;
855 if (text.length) {
856 if (!isPreformatted) {
857 text = text.replace(/[ \t\r\n]+/g, ' ');
858 } else {
859 text = text.replace(/\r\n?/g, '\n'); // Normalize newlines.
860 }
861 // TODO: handle tabs here?
862 chunks[k] = text;
863 spans[k << 1] = length;
864 length += text.length;
865 spans[(k++ << 1) | 1] = node;
866 }
867 }
868 }
869
870 walk(node);
871
872 return {
873 sourceCode: chunks.join('').replace(/\n$/, ''),
874 spans: spans
875 };
876 }
877
878 /**
879 * Apply the given language handler to sourceCode and add the resulting
880 * decorations to out.
881 * @param {!Element} sourceNode
882 * @param {number} basePos the index of sourceCode within the chunk of source
883 * whose decorations are already present on out.
884 * @param {string} sourceCode
885 * @param {function(JobT)} langHandler
886 * @param {DecorationsT} out
887 */
888 function appendDecorations(
889 sourceNode, basePos, sourceCode, langHandler, out) {
890 if (!sourceCode) { return; }
891 /** @type {JobT} */
892 var job = {
893 sourceNode: sourceNode,
894 pre: 1,
895 langExtension: null,
896 numberLines: null,
897 sourceCode: sourceCode,
898 spans: null,
899 basePos: basePos,
900 decorations: null
901 };
902 langHandler(job);
903 out.push.apply(out, job.decorations);
904 }
905
906 var notWs = /\S/;
907
908 /**
909 * Given an element, if it contains only one child element and any text nodes
910 * it contains contain only space characters, return the sole child element.
911 * Otherwise returns undefined.
912 * <p>
913 * This is meant to return the CODE element in {@code <pre><code ...>} when
914 * there is a single child element that contains all the non-space textual
915 * content, but not to return anything where there are multiple child elements
916 * as in {@code <pre><code>...</code><code>...</code></pre>} or when there
917 * is textual content.
918 */
919 function childContentWrapper(element) {
920 var wrapper = undefined;
921 for (var c = element.firstChild; c; c = c.nextSibling) {
922 var type = c.nodeType;
923 wrapper = (type === 1) // Element Node
924 ? (wrapper ? element : c)
925 : (type === 3) // Text Node
926 ? (notWs.test(c.nodeValue) ? element : wrapper)
927 : wrapper;
928 }
929 return wrapper === element ? undefined : wrapper;
930 }
931
932 /** Given triples of [style, pattern, context] returns a lexing function,
933 * The lexing function interprets the patterns to find token boundaries and
934 * returns a decoration list of the form
935 * [index_0, style_0, index_1, style_1, ..., index_n, style_n]
936 * where index_n is an index into the sourceCode, and style_n is a style
937 * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to
938 * all characters in sourceCode[index_n-1:index_n].
939 *
940 * The stylePatterns is a list whose elements have the form
941 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string].
942 *
943 * Style is a style constant like PR_PLAIN, or can be a string of the
944 * form 'lang-FOO', where FOO is a language extension describing the
945 * language of the portion of the token in $1 after pattern executes.
946 * E.g., if style is 'lang-lisp', and group 1 contains the text
947 * '(hello (world))', then that portion of the token will be passed to the
948 * registered lisp handler for formatting.
949 * The text before and after group 1 will be restyled using this decorator
950 * so decorators should take care that this doesn't result in infinite
951 * recursion. For example, the HTML lexer rule for SCRIPT elements looks
952 * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match
953 * '<script>foo()<\/script>', which would cause the current decorator to
954 * be called with '<script>' which would not match the same rule since
955 * group 1 must not be empty, so it would be instead styled as PR_TAG by
956 * the generic tag rule. The handler registered for the 'js' extension would
957 * then be called with 'foo()', and finally, the current decorator would
958 * be called with '<\/script>' which would not match the original rule and
959 * so the generic tag rule would identify it as a tag.
960 *
961 * Pattern must only match prefixes, and if it matches a prefix, then that
962 * match is considered a token with the same style.
963 *
964 * Context is applied to the last non-whitespace, non-comment token
965 * recognized.
966 *
967 * Shortcut is an optional string of characters, any of which, if the first
968 * character, gurantee that this pattern and only this pattern matches.
969 *
970 * @param {Array} shortcutStylePatterns patterns that always start with
971 * a known character. Must have a shortcut string.
972 * @param {Array} fallthroughStylePatterns patterns that will be tried in
973 * order if the shortcut ones fail. May have shortcuts.
974 *
975 * @return {function (JobT)} a function that takes an undecorated job and
976 * attaches a list of decorations.
977 */
978 function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {
979 var shortcuts = {};
980 var tokenizer;
981 (function () {
982 var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
983 var allRegexs = [];
984 var regexKeys = {};
985 for (var i = 0, n = allPatterns.length; i < n; ++i) {
986 var patternParts = allPatterns[i];
987 var shortcutChars = patternParts[3];
988 if (shortcutChars) {
989 for (var c = shortcutChars.length; --c >= 0;) {
990 shortcuts[shortcutChars.charAt(c)] = patternParts;
991 }
992 }
993 var regex = patternParts[1];
994 var k = '' + regex;
995 if (!regexKeys.hasOwnProperty(k)) {
996 allRegexs.push(regex);
997 regexKeys[k] = null;
998 }
999 }
1000 allRegexs.push(/[\0-\uffff]/);
1001 tokenizer = combinePrefixPatterns(allRegexs);
1002 })();
1003
1004 var nPatterns = fallthroughStylePatterns.length;
1005
1006 /**
1007 * Lexes job.sourceCode and attaches an output array job.decorations of
1008 * style classes preceded by the position at which they start in
1009 * job.sourceCode in order.
1010 *
1011 * @type{function (JobT)}
1012 */
1013 var decorate = function (job) {
1014 var sourceCode = job.sourceCode, basePos = job.basePos;
1015 var sourceNode = job.sourceNode;
1016 /** Even entries are positions in source in ascending order. Odd enties
1017 * are style markers (e.g., PR_COMMENT) that run from that position until
1018 * the end.
1019 * @type {DecorationsT}
1020 */
1021 var decorations = [basePos, PR_PLAIN];
1022 var pos = 0; // index into sourceCode
1023 var tokens = sourceCode.match(tokenizer) || [];
1024 var styleCache = {};
1025
1026 for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {
1027 var token = tokens[ti];
1028 var style = styleCache[token];
1029 var match = void 0;
1030
1031 var isEmbedded;
1032 if (typeof style === 'string') {
1033 isEmbedded = false;
1034 } else {
1035 var patternParts = shortcuts[token.charAt(0)];
1036 if (patternParts) {
1037 match = token.match(patternParts[1]);
1038 style = patternParts[0];
1039 } else {
1040 for (var i = 0; i < nPatterns; ++i) {
1041 patternParts = fallthroughStylePatterns[i];
1042 match = token.match(patternParts[1]);
1043 if (match) {
1044 style = patternParts[0];
1045 break;
1046 }
1047 }
1048
1049 if (!match) { // make sure that we make progress
1050 style = PR_PLAIN;
1051 }
1052 }
1053
1054 isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);
1055 if (isEmbedded && !(match && typeof match[1] === 'string')) {
1056 isEmbedded = false;
1057 style = PR_SOURCE;
1058 }
1059
1060 if (!isEmbedded) { styleCache[token] = style; }
1061 }
1062
1063 var tokenStart = pos;
1064 pos += token.length;
1065
1066 if (!isEmbedded) {
1067 decorations.push(basePos + tokenStart, style);
1068 } else { // Treat group 1 as an embedded block of source code.
1069 var embeddedSource = match[1];
1070 var embeddedSourceStart = token.indexOf(embeddedSource);
1071 var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;
1072 if (match[2]) {
1073 // If embeddedSource can be blank, then it would match at the
1074 // beginning which would cause us to infinitely recurse on the
1075 // entire token, so we catch the right context in match[2].
1076 embeddedSourceEnd = token.length - match[2].length;
1077 embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;
1078 }
1079 var lang = style.substring(5);
1080 // Decorate the left of the embedded source
1081 appendDecorations(
1082 sourceNode,
1083 basePos + tokenStart,
1084 token.substring(0, embeddedSourceStart),
1085 decorate, decorations);
1086 // Decorate the embedded source
1087 appendDecorations(
1088 sourceNode,
1089 basePos + tokenStart + embeddedSourceStart,
1090 embeddedSource,
1091 langHandlerForExtension(lang, embeddedSource),
1092 decorations);
1093 // Decorate the right of the embedded section
1094 appendDecorations(
1095 sourceNode,
1096 basePos + tokenStart + embeddedSourceEnd,
1097 token.substring(embeddedSourceEnd),
1098 decorate, decorations);
1099 }
1100 }
1101 job.decorations = decorations;
1102 };
1103 return decorate;
1104 }
1105
1106 /** returns a function that produces a list of decorations from source text.
1107 *
1108 * This code treats ", ', and ` as string delimiters, and \ as a string
1109 * escape. It does not recognize perl's qq() style strings.
1110 * It has no special handling for double delimiter escapes as in basic, or
1111 * the tripled delimiters used in python, but should work on those regardless
1112 * although in those cases a single string literal may be broken up into
1113 * multiple adjacent string literals.
1114 *
1115 * It recognizes C, C++, and shell style comments.
1116 *
1117 * @param {Object} options a set of optional parameters.
1118 * @return {function (JobT)} a function that examines the source code
1119 * in the input job and builds a decoration list which it attaches to
1120 * the job.
1121 */
1122 function sourceDecorator(options) {
1123 var shortcutStylePatterns = [], fallthroughStylePatterns = [];
1124 if (options['tripleQuotedStrings']) {
1125 // '''multi-line-string''', 'single-line-string', and double-quoted
1126 shortcutStylePatterns.push(
1127 [PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|$)|\"\"\"(?:[^\"\\]|\\[\s\S]|\"{1,2}(?=[^\"]))*(?:\"\"\"|$)|\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$))/,
1128 null, '\'"']);
1129 } else if (options['multiLineStrings']) {
1130 // 'multi-line-string', "multi-line-string"
1131 shortcutStylePatterns.push(
1132 [PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|$)|\"(?:[^\\\"]|\\[\s\S])*(?:\"|$)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|$))/,
1133 null, '\'"`']);
1134 } else {
1135 // 'single-line-string', "single-line-string"
1136 shortcutStylePatterns.push(
1137 [PR_STRING,
1138 /^(?:\'(?:[^\\\'\r\n]|\\.)*(?:\'|$)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|$))/,
1139 null, '"\'']);
1140 }
1141 if (options['verbatimStrings']) {
1142 // verbatim-string-literal production from the C# grammar. See issue 93.
1143 fallthroughStylePatterns.push(
1144 [PR_STRING, /^@\"(?:[^\"]|\"\")*(?:\"|$)/, null]);
1145 }
1146 var hc = options['hashComments'];
1147 if (hc) {
1148 if (options['cStyleComments']) {
1149 if (hc > 1) { // multiline hash comments
1150 shortcutStylePatterns.push(
1151 [PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)/, null, '#']);
1152 } else {
1153 // Stop C preprocessor declarations at an unclosed open comment
1154 shortcutStylePatterns.push(
1155 [PR_COMMENT, /^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|include|line|pragma|undef|warning)\b|[^\r\n]*)/,
1156 null, '#']);
1157 }
1158 // #include <stdio.h>
1159 fallthroughStylePatterns.push(
1160 [PR_STRING,
1161 /^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h(?:h|pp|\+\+)?|[a-z]\w*)>/,
1162 null]);
1163 } else {
1164 shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
1165 }
1166 }
1167 if (options['cStyleComments']) {
1168 fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
1169 fallthroughStylePatterns.push(
1170 [PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|$)/, null]);
1171 }
1172 var regexLiterals = options['regexLiterals'];
1173 if (regexLiterals) {
1174 /**
1175 * @const
1176 */
1177 var regexExcls = regexLiterals > 1
1178 ? '' // Multiline regex literals
1179 : '\n\r';
1180 /**
1181 * @const
1182 */
1183 var regexAny = regexExcls ? '.' : '[\\S\\s]';
1184 /**
1185 * @const
1186 */
1187 var REGEX_LITERAL = (
1188 // A regular expression literal starts with a slash that is
1189 // not followed by * or / so that it is not confused with
1190 // comments.
1191 '/(?=[^/*' + regexExcls + '])'
1192 // and then contains any number of raw characters,
1193 + '(?:[^/\\x5B\\x5C' + regexExcls + ']'
1194 // escape sequences (\x5C),
1195 + '|\\x5C' + regexAny
1196 // or non-nesting character sets (\x5B\x5D);
1197 + '|\\x5B(?:[^\\x5C\\x5D' + regexExcls + ']'
1198 + '|\\x5C' + regexAny + ')*(?:\\x5D|$))+'
1199 // finally closed by a /.
1200 + '/');
1201 fallthroughStylePatterns.push(
1202 ['lang-regex',
1203 RegExp('^' + REGEXP_PRECEDER_PATTERN + '(' + REGEX_LITERAL + ')')
1204 ]);
1205 }
1206
1207 var types = options['types'];
1208 if (types) {
1209 fallthroughStylePatterns.push([PR_TYPE, types]);
1210 }
1211
1212 var keywords = ("" + options['keywords']).replace(/^ | $/g, '');
1213 if (keywords.length) {
1214 fallthroughStylePatterns.push(
1215 [PR_KEYWORD,
1216 new RegExp('^(?:' + keywords.replace(/[\s,]+/g, '|') + ')\\b'),
1217 null]);
1218 }
1219
1220 shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);
1221
1222 var punctuation =
1223 // The Bash man page says
1224
1225 // A word is a sequence of characters considered as a single
1226 // unit by GRUB. Words are separated by metacharacters,
1227 // which are the following plus space, tab, and newline: { }
1228 // | & $ ; < >
1229 // ...
1230
1231 // A word beginning with # causes that word and all remaining
1232 // characters on that line to be ignored.
1233
1234 // which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a
1235 // comment but empirically
1236 // $ echo {#}
1237 // {#}
1238 // $ echo \$#
1239 // $#
1240 // $ echo }#
1241 // }#
1242
1243 // so /(?:^|[|&;<>\s])/ is more appropriate.
1244
1245 // http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3
1246 // suggests that this definition is compatible with a
1247 // default mode that tries to use a single token definition
1248 // to recognize both bash/python style comments and C
1249 // preprocessor directives.
1250
1251 // This definition of punctuation does not include # in the list of
1252 // follow-on exclusions, so # will not be broken before if preceeded
1253 // by a punctuation character. We could try to exclude # after
1254 // [|&;<>] but that doesn't seem to cause many major problems.
1255 // If that does turn out to be a problem, we should change the below
1256 // when hc is truthy to include # in the run of punctuation characters
1257 // only when not followint [|&;<>].
1258 '^.[^\\s\\w.$@\'"`/\\\\]*';
1259 if (options['regexLiterals']) {
1260 punctuation += '(?!\s*\/)';
1261 }
1262
1263 fallthroughStylePatterns.push(
1264 // TODO(mikesamuel): recognize non-latin letters and numerals in idents
1265 [PR_LITERAL, /^@[a-z_$][a-z_$@0-9]*/i, null],
1266 [PR_TYPE, /^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\w+_t\b)/, null],
1267 [PR_PLAIN, /^[a-z_$][a-z_$@0-9]*/i, null],
1268 [PR_LITERAL,
1269 new RegExp(
1270 '^(?:'
1271 // A hex number
1272 + '0x[a-f0-9]+'
1273 // or an octal or decimal number,
1274 + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)'
1275 // possibly in scientific notation
1276 + '(?:e[+\\-]?\\d+)?'
1277 + ')'
1278 // with an optional modifier like UL for unsigned long
1279 + '[a-z]*', 'i'),
1280 null, '0123456789'],
1281 // Don't treat escaped quotes in bash as starting strings.
1282 // See issue 144.
1283 [PR_PLAIN, /^\\[\s\S]?/, null],
1284 [PR_PUNCTUATION, new RegExp(punctuation), null]);
1285
1286 return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
1287 }
1288
1289 var decorateSource = sourceDecorator({
1290 'keywords': ALL_KEYWORDS,
1291 'hashComments': true,
1292 'cStyleComments': true,
1293 'multiLineStrings': true,
1294 'regexLiterals': true
1295 });
1296
1297 /**
1298 * Given a DOM subtree, wraps it in a list, and puts each line into its own
1299 * list item.
1300 *
1301 * @param {Node} node modified in place. Its content is pulled into an
1302 * HTMLOListElement, and each line is moved into a separate list item.
1303 * This requires cloning elements, so the input might not have unique
1304 * IDs after numbering.
1305 * @param {number|null|boolean} startLineNum
1306 * If truthy, coerced to an integer which is the 1-indexed line number
1307 * of the first line of code. The number of the first line will be
1308 * attached to the list.
1309 * @param {boolean} isPreformatted true iff white-space in text nodes should
1310 * be treated as significant.
1311 */
1312 function numberLines(node, startLineNum, isPreformatted) {
1313 var nocode = /(?:^|\s)nocode(?:\s|$)/;
1314 var lineBreak = /\r\n?|\n/;
1315
1316 var document = node.ownerDocument;
1317
1318 var li = document.createElement('li');
1319 while (node.firstChild) {
1320 li.appendChild(node.firstChild);
1321 }
1322 // An array of lines. We split below, so this is initialized to one
1323 // un-split line.
1324 var listItems = [li];
1325
1326 function walk(node) {
1327 var type = node.nodeType;
1328 if (type == 1 && !nocode.test(node.className)) { // Element
1329 if ('br' === node.nodeName) {
1330 breakAfter(node);
1331 // Discard the <BR> since it is now flush against a </LI>.
1332 if (node.parentNode) {
1333 node.parentNode.removeChild(node);
1334 }
1335 } else {
1336 for (var child = node.firstChild; child; child = child.nextSibling) {
1337 walk(child);
1338 }
1339 }
1340 } else if ((type == 3 || type == 4) && isPreformatted) { // Text
1341 var text = node.nodeValue;
1342 var match = text.match(lineBreak);
1343 if (match) {
1344 var firstLine = text.substring(0, match.index);
1345 node.nodeValue = firstLine;
1346 var tail = text.substring(match.index + match[0].length);
1347 if (tail) {
1348 var parent = node.parentNode;
1349 parent.insertBefore(
1350 document.createTextNode(tail), node.nextSibling);
1351 }
1352 breakAfter(node);
1353 if (!firstLine) {
1354 // Don't leave blank text nodes in the DOM.
1355 node.parentNode.removeChild(node);
1356 }
1357 }
1358 }
1359 }
1360
1361 // Split a line after the given node.
1362 function breakAfter(lineEndNode) {
1363 // If there's nothing to the right, then we can skip ending the line
1364 // here, and move root-wards since splitting just before an end-tag
1365 // would require us to create a bunch of empty copies.
1366 while (!lineEndNode.nextSibling) {
1367 lineEndNode = lineEndNode.parentNode;
1368 if (!lineEndNode) { return; }
1369 }
1370
1371 function breakLeftOf(limit, copy) {
1372 // Clone shallowly if this node needs to be on both sides of the break.
1373 var rightSide = copy ? limit.cloneNode(false) : limit;
1374 var parent = limit.parentNode;
1375 if (parent) {
1376 // We clone the parent chain.
1377 // This helps us resurrect important styling elements that cross lines.
1378 // E.g. in <i>Foo<br>Bar</i>
1379 // should be rewritten to <li><i>Foo</i></li><li><i>Bar</i></li>.
1380 var parentClone = breakLeftOf(parent, 1);
1381 // Move the clone and everything to the right of the original
1382 // onto the cloned parent.
1383 var next = limit.nextSibling;
1384 parentClone.appendChild(rightSide);
1385 for (var sibling = next; sibling; sibling = next) {
1386 next = sibling.nextSibling;
1387 parentClone.appendChild(sibling);
1388 }
1389 }
1390 return rightSide;
1391 }
1392
1393 var copiedListItem = breakLeftOf(lineEndNode.nextSibling, 0);
1394
1395 // Walk the parent chain until we reach an unattached LI.
1396 for (var parent;
1397 // Check nodeType since IE invents document fragments.
1398 (parent = copiedListItem.parentNode) && parent.nodeType === 1;) {
1399 copiedListItem = parent;
1400 }
1401 // Put it on the list of lines for later processing.
1402 listItems.push(copiedListItem);
1403 }
1404
1405 // Split lines while there are lines left to split.
1406 for (var i = 0; // Number of lines that have been split so far.
1407 i < listItems.length; // length updated by breakAfter calls.
1408 ++i) {
1409 walk(listItems[i]);
1410 }
1411
1412 // Make sure numeric indices show correctly.
1413 if (startLineNum === (startLineNum|0)) {
1414 listItems[0].setAttribute('value', startLineNum);
1415 }
1416
1417 var ol = document.createElement('ol');
1418 ol.className = 'linenums';
1419 var offset = Math.max(0, ((startLineNum - 1 /* zero index */)) | 0) || 0;
1420 for (var i = 0, n = listItems.length; i < n; ++i) {
1421 li = listItems[i];
1422 // Stick a class on the LIs so that stylesheets can
1423 // color odd/even rows, or any other row pattern that
1424 // is co-prime with 10.
1425 li.className = 'L' + ((i + offset) % 10);
1426 if (!li.firstChild) {
1427 li.appendChild(document.createTextNode('\xA0'));
1428 }
1429 ol.appendChild(li);
1430 }
1431
1432 node.appendChild(ol);
1433 }
1434
1435 /**
1436 * Breaks {@code job.sourceCode} around style boundaries in
1437 * {@code job.decorations} and modifies {@code job.sourceNode} in place.
1438 * @param {JobT} job
1439 * @private
1440 */
1441 function recombineTagsAndDecorations(job) {
1442 var isIE8OrEarlier = /\bMSIE\s(\d+)/.exec(navigator.userAgent);
1443 isIE8OrEarlier = isIE8OrEarlier && +isIE8OrEarlier[1] <= 8;
1444 var newlineRe = /\n/g;
1445
1446 var source = job.sourceCode;
1447 var sourceLength = source.length;
1448 // Index into source after the last code-unit recombined.
1449 var sourceIndex = 0;
1450
1451 var spans = job.spans;
1452 var nSpans = spans.length;
1453 // Index into spans after the last span which ends at or before sourceIndex.
1454 var spanIndex = 0;
1455
1456 var decorations = job.decorations;
1457 var nDecorations = decorations.length;
1458 // Index into decorations after the last decoration which ends at or before
1459 // sourceIndex.
1460 var decorationIndex = 0;
1461
1462 // Remove all zero-length decorations.
1463 decorations[nDecorations] = sourceLength;
1464 var decPos, i;
1465 for (i = decPos = 0; i < nDecorations;) {
1466 if (decorations[i] !== decorations[i + 2]) {
1467 decorations[decPos++] = decorations[i++];
1468 decorations[decPos++] = decorations[i++];
1469 } else {
1470 i += 2;
1471 }
1472 }
1473 nDecorations = decPos;
1474
1475 // Simplify decorations.
1476 for (i = decPos = 0; i < nDecorations;) {
1477 var startPos = decorations[i];
1478 // Conflate all adjacent decorations that use the same style.
1479 var startDec = decorations[i + 1];
1480 var end = i + 2;
1481 while (end + 2 <= nDecorations && decorations[end + 1] === startDec) {
1482 end += 2;
1483 }
1484 decorations[decPos++] = startPos;
1485 decorations[decPos++] = startDec;
1486 i = end;
1487 }
1488
1489 nDecorations = decorations.length = decPos;
1490
1491 var sourceNode = job.sourceNode;
1492 var oldDisplay = "";
1493 if (sourceNode) {
1494 oldDisplay = sourceNode.style.display;
1495 sourceNode.style.display = 'none';
1496 }
1497 try {
1498 var decoration = null;
1499 while (spanIndex < nSpans) {
1500 var spanStart = spans[spanIndex];
1501 var spanEnd = /** @type{number} */ (spans[spanIndex + 2])
1502 || sourceLength;
1503
1504 var decEnd = decorations[decorationIndex + 2] || sourceLength;
1505
1506 var end = Math.min(spanEnd, decEnd);
1507
1508 var textNode = /** @type{Node} */ (spans[spanIndex + 1]);
1509 var styledText;
1510 if (textNode.nodeType !== 1 // Don't muck with <BR>s or <LI>s
1511 // Don't introduce spans around empty text nodes.
1512 && (styledText = source.substring(sourceIndex, end))) {
1513 // This may seem bizarre, and it is. Emitting LF on IE causes the
1514 // code to display with spaces instead of line breaks.
1515 // Emitting Windows standard issue linebreaks (CRLF) causes a blank
1516 // space to appear at the beginning of every line but the first.
1517 // Emitting an old Mac OS 9 line separator makes everything spiffy.
1518 if (isIE8OrEarlier) {
1519 styledText = styledText.replace(newlineRe, '\r');
1520 }
1521 textNode.nodeValue = styledText;
1522 var document = textNode.ownerDocument;
1523 var span = document.createElement('span');
1524 span.className = decorations[decorationIndex + 1];
1525 var parentNode = textNode.parentNode;
1526 parentNode.replaceChild(span, textNode);
1527 span.appendChild(textNode);
1528 if (sourceIndex < spanEnd) { // Split off a text node.
1529 spans[spanIndex + 1] = textNode
1530 // TODO: Possibly optimize by using '' if there's no flicker.
1531 = document.createTextNode(source.substring(end, spanEnd));
1532 parentNode.insertBefore(textNode, span.nextSibling);
1533 }
1534 }
1535
1536 sourceIndex = end;
1537
1538 if (sourceIndex >= spanEnd) {
1539 spanIndex += 2;
1540 }
1541 if (sourceIndex >= decEnd) {
1542 decorationIndex += 2;
1543 }
1544 }
1545 } finally {
1546 if (sourceNode) {
1547 sourceNode.style.display = oldDisplay;
1548 }
1549 }
1550 }
1551
1552 /** Maps language-specific file extensions to handlers. */
1553 var langHandlerRegistry = {};
1554 /** Register a language handler for the given file extensions.
1555 * @param {function (JobT)} handler a function from source code to a list
1556 * of decorations. Takes a single argument job which describes the
1557 * state of the computation and attaches the decorations to it.
1558 * @param {Array.<string>} fileExtensions
1559 */
1560 function registerLangHandler(handler, fileExtensions) {
1561 for (var i = fileExtensions.length; --i >= 0;) {
1562 var ext = fileExtensions[i];
1563 if (!langHandlerRegistry.hasOwnProperty(ext)) {
1564 langHandlerRegistry[ext] = handler;
1565 } else if (win['console']) {
1566 console['warn']('cannot override language handler %s', ext);
1567 }
1568 }
1569 }
1570 function langHandlerForExtension(extension, source) {
1571 if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {
1572 // Treat it as markup if the first non whitespace character is a < and
1573 // the last non-whitespace character is a >.
1574 extension = /^\s*</.test(source)
1575 ? 'default-markup'
1576 : 'default-code';
1577 }
1578 return langHandlerRegistry[extension];
1579 }
1580 registerLangHandler(decorateSource, ['default-code']);
1581 registerLangHandler(
1582 createSimpleLexer(
1583 [],
1584 [
1585 [PR_PLAIN, /^[^<?]+/],
1586 [PR_DECLARATION, /^<!\w[^>]*(?:>|$)/],
1587 [PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|$)/],
1588 // Unescaped content in an unknown language
1589 ['lang-', /^<\?([\s\S]+?)(?:\?>|$)/],
1590 ['lang-', /^<%([\s\S]+?)(?:%>|$)/],
1591 [PR_PUNCTUATION, /^(?:<[%?]|[%?]>)/],
1592 ['lang-', /^<xmp\b[^>]*>([\s\S]+?)<\/xmp\b[^>]*>/i],
1593 // Unescaped content in javascript. (Or possibly vbscript).
1594 ['lang-js', /^<script\b[^>]*>([\s\S]*?)(<\/script\b[^>]*>)/i],
1595 // Contains unescaped stylesheet content
1596 ['lang-css', /^<style\b[^>]*>([\s\S]*?)(<\/style\b[^>]*>)/i],
1597 ['lang-in.tag', /^(<\/?[a-z][^<>]*>)/i]
1598 ]),
1599 ['default-markup', 'htm', 'html', 'mxml', 'xhtml', 'xml', 'xsl']);
1600 registerLangHandler(
1601 createSimpleLexer(
1602 [
1603 [PR_PLAIN, /^[\s]+/, null, ' \t\r\n'],
1604 [PR_ATTRIB_VALUE, /^(?:\"[^\"]*\"?|\'[^\']*\'?)/, null, '\"\'']
1605 ],
1606 [
1607 [PR_TAG, /^^<\/?[a-z](?:[\w.:-]*\w)?|\/?>$/i],
1608 [PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],
1609 ['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],
1610 [PR_PUNCTUATION, /^[=<>\/]+/],
1611 ['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i],
1612 ['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i],
1613 ['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i],
1614 ['lang-css', /^style\s*=\s*\"([^\"]+)\"/i],
1615 ['lang-css', /^style\s*=\s*\'([^\']+)\'/i],
1616 ['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i]
1617 ]),
1618 ['in.tag']);
1619 registerLangHandler(
1620 createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);
1621 registerLangHandler(sourceDecorator({
1622 'keywords': CPP_KEYWORDS,
1623 'hashComments': true,
1624 'cStyleComments': true,
1625 'types': C_TYPES
1626 }), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);
1627 registerLangHandler(sourceDecorator({
1628 'keywords': 'null,true,false'
1629 }), ['json']);
1630 registerLangHandler(sourceDecorator({
1631 'keywords': CSHARP_KEYWORDS,
1632 'hashComments': true,
1633 'cStyleComments': true,
1634 'verbatimStrings': true,
1635 'types': C_TYPES
1636 }), ['cs']);
1637 registerLangHandler(sourceDecorator({
1638 'keywords': JAVA_KEYWORDS,
1639 'cStyleComments': true
1640 }), ['java']);
1641 registerLangHandler(sourceDecorator({
1642 'keywords': SH_KEYWORDS,
1643 'hashComments': true,
1644 'multiLineStrings': true
1645 }), ['bash', 'bsh', 'csh', 'sh']);
1646 registerLangHandler(sourceDecorator({
1647 'keywords': PYTHON_KEYWORDS,
1648 'hashComments': true,
1649 'multiLineStrings': true,
1650 'tripleQuotedStrings': true
1651 }), ['cv', 'py', 'python']);
1652 registerLangHandler(sourceDecorator({
1653 'keywords': PERL_KEYWORDS,
1654 'hashComments': true,
1655 'multiLineStrings': true,
1656 'regexLiterals': 2 // multiline regex literals
1657 }), ['perl', 'pl', 'pm']);
1658 registerLangHandler(sourceDecorator({
1659 'keywords': RUBY_KEYWORDS,
1660 'hashComments': true,
1661 'multiLineStrings': true,
1662 'regexLiterals': true
1663 }), ['rb', 'ruby']);
1664 registerLangHandler(sourceDecorator({
1665 'keywords': JSCRIPT_KEYWORDS,
1666 'cStyleComments': true,
1667 'regexLiterals': true
1668 }), ['javascript', 'js', 'ts', 'typescript']);
1669 registerLangHandler(sourceDecorator({
1670 'keywords': COFFEE_KEYWORDS,
1671 'hashComments': 3, // ### style block comments
1672 'cStyleComments': true,
1673 'multilineStrings': true,
1674 'tripleQuotedStrings': true,
1675 'regexLiterals': true
1676 }), ['coffee']);
1677 registerLangHandler(
1678 createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);
1679
1680 /** @param {JobT} job */
1681 function applyDecorator(job) {
1682 var opt_langExtension = job.langExtension;
1683
1684 try {
1685 // Extract tags, and convert the source code to plain text.
1686 var sourceAndSpans = extractSourceSpans(job.sourceNode, job.pre);
1687 /** Plain text. @type {string} */
1688 var source = sourceAndSpans.sourceCode;
1689 job.sourceCode = source;
1690 job.spans = sourceAndSpans.spans;
1691 job.basePos = 0;
1692
1693 // Apply the appropriate language handler
1694 langHandlerForExtension(opt_langExtension, source)(job);
1695
1696 // Integrate the decorations and tags back into the source code,
1697 // modifying the sourceNode in place.
1698 recombineTagsAndDecorations(job);
1699 } catch (e) {
1700 if (win['console']) {
1701 console['log'](e && e['stack'] || e);
1702 }
1703 }
1704 }
1705
1706 /**
1707 * Pretty print a chunk of code.
1708 * @param sourceCodeHtml {string} The HTML to pretty print.
1709 * @param opt_langExtension {string} The language name to use.
1710 * Typically, a filename extension like 'cpp' or 'java'.
1711 * @param opt_numberLines {number|boolean} True to number lines,
1712 * or the 1-indexed number of the first line in sourceCodeHtml.
1713 */
1714 function $prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {
1715 /** @type{number|boolean} */
1716 var nl = opt_numberLines || false;
1717 /** @type{string|null} */
1718 var langExtension = opt_langExtension || null;
1719 /** @type{!Element} */
1720 var container = document.createElement('div');
1721 // This could cause images to load and onload listeners to fire.
1722 // E.g. <img onerror="alert(1337)" src="nosuchimage.png">.
1723 // We assume that the inner HTML is from a trusted source.
1724 // The pre-tag is required for IE8 which strips newlines from innerHTML
1725 // when it is injected into a <pre> tag.
1726 // http://stackoverflow.com/questions/451486/pre-tag-loses-line-breaks-when-setting-innerhtml-in-ie
1727 // http://stackoverflow.com/questions/195363/inserting-a-newline-into-a-pre-tag-ie-javascript
1728 container.innerHTML = '<pre>' + sourceCodeHtml + '</pre>';
1729 container = /** @type{!Element} */(container.firstChild);
1730 if (nl) {
1731 numberLines(container, nl, true);
1732 }
1733
1734 /** @type{JobT} */
1735 var job = {
1736 langExtension: langExtension,
1737 numberLines: nl,
1738 sourceNode: container,
1739 pre: 1,
1740 sourceCode: null,
1741 basePos: null,
1742 spans: null,
1743 decorations: null
1744 };
1745 applyDecorator(job);
1746 return container.innerHTML;
1747 }
1748
1749 /**
1750 * Find all the {@code <pre>} and {@code <code>} tags in the DOM with
1751 * {@code class=prettyprint} and prettify them.
1752 *
1753 * @param {Function} opt_whenDone called when prettifying is done.
1754 * @param {HTMLElement|HTMLDocument} opt_root an element or document
1755 * containing all the elements to pretty print.
1756 * Defaults to {@code document.body}.
1757 */
1758 function $prettyPrint(opt_whenDone, opt_root) {
1759 var root = opt_root || document.body;
1760 var doc = root.ownerDocument || document;
1761 function byTagName(tn) { return root.getElementsByTagName(tn); }
1762 // fetch a list of nodes to rewrite
1763 var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];
1764 var elements = [];
1765 for (var i = 0; i < codeSegments.length; ++i) {
1766 for (var j = 0, n = codeSegments[i].length; j < n; ++j) {
1767 elements.push(codeSegments[i][j]);
1768 }
1769 }
1770 codeSegments = null;
1771
1772 var clock = Date;
1773 if (!clock['now']) {
1774 clock = { 'now': function () { return +(new Date); } };
1775 }
1776
1777 // The loop is broken into a series of continuations to make sure that we
1778 // don't make the browser unresponsive when rewriting a large page.
1779 var k = 0;
1780
1781 var langExtensionRe = /\blang(?:uage)?-([\w.]+)(?!\S)/;
1782 var prettyPrintRe = /\bprettyprint\b/;
1783 var prettyPrintedRe = /\bprettyprinted\b/;
1784 var preformattedTagNameRe = /pre|xmp/i;
1785 var codeRe = /^code$/i;
1786 var preCodeXmpRe = /^(?:pre|code|xmp)$/i;
1787 var EMPTY = {};
1788
1789 function doWork() {
1790 var endTime = (win['PR_SHOULD_USE_CONTINUATION'] ?
1791 clock['now']() + 250 /* ms */ :
1792 Infinity);
1793 for (; k < elements.length && clock['now']() < endTime; k++) {
1794 var cs = elements[k];
1795
1796 // Look for a preceding comment like
1797 // <?prettify lang="..." linenums="..."?>
1798 var attrs = EMPTY;
1799 {
1800 for (var preceder = cs; (preceder = preceder.previousSibling);) {
1801 var nt = preceder.nodeType;
1802 // <?foo?> is parsed by HTML 5 to a comment node (8)
1803 // like <!--?foo?-->, but in XML is a processing instruction
1804 var value = (nt === 7 || nt === 8) && preceder.nodeValue;
1805 if (value
1806 ? !/^\??prettify\b/.test(value)
1807 : (nt !== 3 || /\S/.test(preceder.nodeValue))) {
1808 // Skip over white-space text nodes but not others.
1809 break;
1810 }
1811 if (value) {
1812 attrs = {};
1813 value.replace(
1814 /\b(\w+)=([\w:.%+-]+)/g,
1815 function (_, name, value) { attrs[name] = value; });
1816 break;
1817 }
1818 }
1819 }
1820
1821 var className = cs.className;
1822 if ((attrs !== EMPTY || prettyPrintRe.test(className))
1823 // Don't redo this if we've already done it.
1824 // This allows recalling pretty print to just prettyprint elements
1825 // that have been added to the page since last call.
1826 && !prettyPrintedRe.test(className)) {
1827
1828 // make sure this is not nested in an already prettified element
1829 var nested = false;
1830 for (var p = cs.parentNode; p; p = p.parentNode) {
1831 var tn = p.tagName;
1832 if (preCodeXmpRe.test(tn)
1833 && p.className && prettyPrintRe.test(p.className)) {
1834 nested = true;
1835 break;
1836 }
1837 }
1838 if (!nested) {
1839 // Mark done. If we fail to prettyprint for whatever reason,
1840 // we shouldn't try again.
1841 cs.className += ' prettyprinted';
1842
1843 // If the classes includes a language extensions, use it.
1844 // Language extensions can be specified like
1845 // <pre class="prettyprint lang-cpp">
1846 // the language extension "cpp" is used to find a language handler
1847 // as passed to PR.registerLangHandler.
1848 // HTML5 recommends that a language be specified using "language-"
1849 // as the prefix instead. Google Code Prettify supports both.
1850 // http://dev.w3.org/html5/spec-author-view/the-code-element.html
1851 var langExtension = attrs['lang'];
1852 if (!langExtension) {
1853 langExtension = className.match(langExtensionRe);
1854 // Support <pre class="prettyprint"><code class="language-c">
1855 var wrapper;
1856 if (!langExtension && (wrapper = childContentWrapper(cs))
1857 && codeRe.test(wrapper.tagName)) {
1858 langExtension = wrapper.className.match(langExtensionRe);
1859 }
1860
1861 if (langExtension) { langExtension = langExtension[1]; }
1862 }
1863
1864 var preformatted;
1865 if (preformattedTagNameRe.test(cs.tagName)) {
1866 preformatted = 1;
1867 } else {
1868 var currentStyle = cs['currentStyle'];
1869 var defaultView = doc.defaultView;
1870 var whitespace = (
1871 currentStyle
1872 ? currentStyle['whiteSpace']
1873 : (defaultView
1874 && defaultView.getComputedStyle)
1875 ? defaultView.getComputedStyle(cs, null)
1876 .getPropertyValue('white-space')
1877 : 0);
1878 preformatted = whitespace
1879 && 'pre' === whitespace.substring(0, 3);
1880 }
1881
1882 // Look for a class like linenums or linenums:<n> where <n> is the
1883 // 1-indexed number of the first line.
1884 var lineNums = attrs['linenums'];
1885 if (!(lineNums = lineNums === 'true' || +lineNums)) {
1886 lineNums = className.match(/\blinenums\b(?::(\d+))?/);
1887 lineNums =
1888 lineNums
1889 ? lineNums[1] && lineNums[1].length
1890 ? +lineNums[1] : true
1891 : false;
1892 }
1893 if (lineNums) { numberLines(cs, lineNums, preformatted); }
1894
1895 // do the pretty printing
1896 var prettyPrintingJob = {
1897 langExtension: langExtension,
1898 sourceNode: cs,
1899 numberLines: lineNums,
1900 pre: preformatted,
1901 sourceCode: null,
1902 basePos: null,
1903 spans: null,
1904 decorations: null
1905 };
1906 applyDecorator(prettyPrintingJob);
1907 }
1908 }
1909 }
1910 if (k < elements.length) {
1911 // finish up in a continuation
1912 win.setTimeout(doWork, 250);
1913 } else if ('function' === typeof opt_whenDone) {
1914 opt_whenDone();
1915 }
1916 }
1917
1918 doWork();
1919 }
1920
1921 /**
1922 * Contains functions for creating and registering new language handlers.
1923 * @type {Object}
1924 */
1925 var PR = win['PR'] = {
1926 'createSimpleLexer': createSimpleLexer,
1927 'registerLangHandler': registerLangHandler,
1928 'sourceDecorator': sourceDecorator,
1929 'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
1930 'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
1931 'PR_COMMENT': PR_COMMENT,
1932 'PR_DECLARATION': PR_DECLARATION,
1933 'PR_KEYWORD': PR_KEYWORD,
1934 'PR_LITERAL': PR_LITERAL,
1935 'PR_NOCODE': PR_NOCODE,
1936 'PR_PLAIN': PR_PLAIN,
1937 'PR_PUNCTUATION': PR_PUNCTUATION,
1938 'PR_SOURCE': PR_SOURCE,
1939 'PR_STRING': PR_STRING,
1940 'PR_TAG': PR_TAG,
1941 'PR_TYPE': PR_TYPE,
1942 'prettyPrintOne':
1943 IN_GLOBAL_SCOPE
1944 ? (win['prettyPrintOne'] = $prettyPrintOne)
1945 : (prettyPrintOne = $prettyPrintOne),
1946 'prettyPrint': prettyPrint =
1947 IN_GLOBAL_SCOPE
1948 ? (win['prettyPrint'] = $prettyPrint)
1949 : (prettyPrint = $prettyPrint)
1950 };
1951
1952 // Make PR available via the Asynchronous Module Definition (AMD) API.
1953 // Per https://github.com/amdjs/amdjs-api/wiki/AMD:
1954 // The Asynchronous Module Definition (AMD) API specifies a
1955 // mechanism for defining modules such that the module and its
1956 // dependencies can be asynchronously loaded.
1957 // ...
1958 // To allow a clear indicator that a global define function (as
1959 // needed for script src browser loading) conforms to the AMD API,
1960 // any global define function SHOULD have a property called "amd"
1961 // whose value is an object. This helps avoid conflict with any
1962 // other existing JavaScript code that could have defined a define()
1963 // function that does not conform to the AMD API.
1964 var define = win['define'];
1965 if (typeof define === "function" && define['amd']) {
1966 define("google-code-prettify", [], function () {
1967 return PR;
1968 });
1969 }
1970 })();
1971 return prettyPrint;
1972 })();
1973
1974 // If this script is deferred or async and the document is already
1975 // loaded we need to wait for language handlers to load before performing
1976 // any autorun.
1977 function onLangsLoaded() {
1978 if (autorun) {
1979 contentLoaded(
1980 function () {
1981 var n = callbacks.length;
1982 var callback = n ? function () {
1983 for (var i = 0; i < n; ++i) {
1984 (function (i) {
1985 win.setTimeout(
1986 function () {
1987 win['exports'][callbacks[i]].apply(win, arguments);
1988 }, 0);
1989 })(i);
1990 }
1991 } : void 0;
1992 prettyPrint(callback);
1993 });
1994 }
1995 }
1996 checkPendingLanguages();
1997
1998}());