版博士V2.0程序
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.
 
 
 
 

244 строки
11 KiB

  1. "use strict";
  2. var __importDefault = (this && this.__importDefault) || function (mod) {
  3. return (mod && mod.__esModule) ? mod : { "default": mod };
  4. };
  5. Object.defineProperty(exports, "__esModule", { value: true });
  6. const VoidElements_1 = __importDefault(require("../config/VoidElements"));
  7. const UnnestableElements_1 = __importDefault(require("../config/UnnestableElements"));
  8. const ChildLessElements_1 = __importDefault(require("../config/ChildLessElements"));
  9. const he_1 = require("he");
  10. const NamespaceURI_1 = __importDefault(require("../config/NamespaceURI"));
  11. const PlainTextElements_1 = __importDefault(require("../config/PlainTextElements"));
  12. const CONDITION_COMMENT_REGEXP = /<!(--)?\[if (!|le|lt|lte|gt|gte|\(.*\)|&|\|| |IE|WindowsEdition|Contoso|true|false|\d+\.?(\d+)?|)*\]>/gi;
  13. const CONDITION_COMMENT_END_REGEXP = /<!\[endif\](--)?>/gi;
  14. const MARKUP_REGEXP = /<(\/?)([a-z][-.0-9_a-z]*)\s*([^<>]*?)(\/?)>/gi;
  15. const COMMENT_REGEXP = /<!--(.*?)-->|<([!?])([^>]*)>/gi;
  16. const DOCUMENT_TYPE_ATTRIBUTE_REGEXP = /"([^"]+)"/gm;
  17. const ATTRIBUTE_REGEXP = /([^\s=]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))/gms;
  18. /**
  19. * XML parser.
  20. */
  21. class XMLParser {
  22. /**
  23. * Parses XML/HTML and returns a root element.
  24. *
  25. * @param document Document.
  26. * @param data HTML data.
  27. * @param [evaluateScripts = false] Set to "true" to enable script execution.
  28. * @returns Root element.
  29. */
  30. static parse(document, data, evaluateScripts = false) {
  31. const root = document.createDocumentFragment();
  32. const stack = [root];
  33. const markupRegexp = new RegExp(MARKUP_REGEXP, 'gi');
  34. let parent = root;
  35. let parentTagName = null;
  36. let parentUnnestableTagName = null;
  37. let lastTextIndex = 0;
  38. let match;
  39. if (data !== null && data !== undefined) {
  40. data = String(data);
  41. while ((match = markupRegexp.exec(data))) {
  42. const tagName = match[2].toLowerCase();
  43. const isStartTag = !match[1];
  44. if (parent && match.index !== lastTextIndex) {
  45. const text = data.substring(lastTextIndex, match.index);
  46. if (parentTagName && PlainTextElements_1.default.includes(parentTagName)) {
  47. parent.appendChild(document.createTextNode(text));
  48. }
  49. else {
  50. let condCommMatch;
  51. let condCommEndMatch;
  52. const condCommRegexp = new RegExp(CONDITION_COMMENT_REGEXP, 'gi');
  53. const condCommEndRegexp = new RegExp(CONDITION_COMMENT_END_REGEXP, 'gi');
  54. // @Refer: https://learn.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/?redirectedfrom=MSDN
  55. if (isStartTag &&
  56. (condCommMatch = condCommRegexp.exec(text)) &&
  57. condCommMatch[0] &&
  58. (condCommEndMatch = condCommEndRegexp.exec(data.substring(markupRegexp.lastIndex))) &&
  59. condCommEndMatch[0]) {
  60. markupRegexp.lastIndex += condCommEndRegexp.lastIndex;
  61. continue;
  62. }
  63. else {
  64. this.appendTextAndCommentNodes(document, parent, text);
  65. }
  66. }
  67. }
  68. if (isStartTag) {
  69. const namespaceURI = tagName === 'svg'
  70. ? NamespaceURI_1.default.svg
  71. : parent.namespaceURI || NamespaceURI_1.default.html;
  72. const newElement = document.createElementNS(namespaceURI, tagName);
  73. // Scripts are not allowed to be executed when they are parsed using innerHTML, outerHTML, replaceWith() etc.
  74. // However, they are allowed to be executed when document.write() is used.
  75. // See: https://developer.mozilla.org/en-US/docs/Web/API/HTMLScriptElement
  76. if (tagName === 'script') {
  77. newElement._evaluateScript = evaluateScripts;
  78. }
  79. // An assumption that the same rule should be applied for the HTMLLinkElement is made here.
  80. if (tagName === 'link') {
  81. newElement._evaluateCSS = evaluateScripts;
  82. }
  83. this.setAttributes(newElement, match[3]);
  84. if (!match[4] && !VoidElements_1.default.includes(tagName)) {
  85. // Some elements are not allowed to be nested (e.g. "<a><a></a></a>" is not allowed.).
  86. // Therefore we will auto-close the tag.
  87. if (parentUnnestableTagName === tagName) {
  88. stack.pop();
  89. parent = parent.parentNode || root;
  90. }
  91. parent = parent.appendChild(newElement);
  92. parentTagName = tagName;
  93. parentUnnestableTagName = this.getUnnestableTagName(parent);
  94. stack.push(parent);
  95. }
  96. else {
  97. parent.appendChild(newElement);
  98. }
  99. lastTextIndex = markupRegexp.lastIndex;
  100. // Tags which contain non-parsed content
  101. // For example: <script> JavaScript should not be parsed
  102. if (ChildLessElements_1.default.includes(tagName)) {
  103. let childLessMatch = null;
  104. while ((childLessMatch = markupRegexp.exec(data))) {
  105. if (childLessMatch[2].toLowerCase() === tagName && childLessMatch[1]) {
  106. markupRegexp.lastIndex -= childLessMatch[0].length;
  107. break;
  108. }
  109. }
  110. }
  111. }
  112. else {
  113. stack.pop();
  114. parent = stack[stack.length - 1] || root;
  115. parentTagName = parent.tagName
  116. ? parent.tagName.toLowerCase()
  117. : null;
  118. parentUnnestableTagName = this.getUnnestableTagName(parent);
  119. lastTextIndex = markupRegexp.lastIndex;
  120. }
  121. }
  122. // Text after last element
  123. if ((!match && data.length > 0) || (match && lastTextIndex !== match.index)) {
  124. const text = data.substring(lastTextIndex);
  125. this.appendTextAndCommentNodes(document, parent || root, text);
  126. }
  127. }
  128. return root;
  129. }
  130. /**
  131. * Returns a tag name if element is unnestable.
  132. *
  133. * @param element Element.
  134. * @returns Tag name if element is unnestable.
  135. */
  136. static getUnnestableTagName(element) {
  137. const tagName = element.tagName ? element.tagName.toLowerCase() : null;
  138. return tagName && UnnestableElements_1.default.includes(tagName) ? tagName : null;
  139. }
  140. /**
  141. * Appends text and comment nodes.
  142. *
  143. * @param document Document.
  144. * @param node Node.
  145. * @param text Text to search in.
  146. */
  147. static appendTextAndCommentNodes(document, node, text) {
  148. for (const innerNode of this.getTextAndCommentNodes(document, text)) {
  149. node.appendChild(innerNode);
  150. }
  151. }
  152. /**
  153. * Returns text and comment nodes from a text.
  154. *
  155. * @param document Document.
  156. * @param text Text to search in.
  157. * @returns Nodes.
  158. */
  159. static getTextAndCommentNodes(document, text) {
  160. const nodes = [];
  161. const commentRegExp = new RegExp(COMMENT_REGEXP, 'gms');
  162. let hasDocumentType = false;
  163. let lastIndex = 0;
  164. let match;
  165. while ((match = commentRegExp.exec(text))) {
  166. if (match.index > 0 && lastIndex !== match.index) {
  167. const textNode = document.createTextNode(text.substring(lastIndex, match.index));
  168. nodes.push(textNode);
  169. }
  170. if (match[3] && match[3].toUpperCase().startsWith('DOCTYPE')) {
  171. const docTypeSplit = match[3].split(' ');
  172. if (docTypeSplit.length > 1) {
  173. const docTypeString = docTypeSplit.slice(1).join(' ');
  174. const attributes = [];
  175. const attributeRegExp = new RegExp(DOCUMENT_TYPE_ATTRIBUTE_REGEXP, 'gm');
  176. const isPublic = docTypeString.includes('PUBLIC');
  177. let attributeMatch;
  178. while ((attributeMatch = attributeRegExp.exec(docTypeString))) {
  179. attributes.push(attributeMatch[1]);
  180. }
  181. const publicId = isPublic ? attributes[0] || '' : '';
  182. const systemId = isPublic ? attributes[1] || '' : attributes[0] || '';
  183. const documentTypeNode = document.implementation.createDocumentType(docTypeSplit[1], publicId, systemId);
  184. nodes.push(documentTypeNode);
  185. hasDocumentType = true;
  186. }
  187. }
  188. else {
  189. const comment = match[1] ? match[1] : match[2] === '?' ? '?' + match[3] : match[3];
  190. const commentNode = document.createComment(comment);
  191. nodes.push(commentNode);
  192. lastIndex = match.index + match[0].length;
  193. }
  194. }
  195. if (!hasDocumentType && lastIndex < text.length) {
  196. const textNode = document.createTextNode(text.substring(lastIndex));
  197. nodes.push(textNode);
  198. }
  199. return nodes;
  200. }
  201. /**
  202. * Sets raw attributes.
  203. *
  204. * @param element Element.
  205. * @param attributesString Raw attributes.
  206. */
  207. static setAttributes(element, attributesString) {
  208. const attributes = attributesString.trim();
  209. if (attributes) {
  210. const regExp = new RegExp(ATTRIBUTE_REGEXP, 'gi');
  211. let match;
  212. // Attributes with value
  213. while ((match = regExp.exec(attributes))) {
  214. if (match[1]) {
  215. const value = (0, he_1.decode)(match[2] || match[3] || match[4] || '');
  216. const name = this._getAttributeName(element.namespaceURI, match[1]);
  217. const namespaceURI = element.tagName === 'SVG' && name === 'xmlns' ? value : null;
  218. element.setAttributeNS(namespaceURI, name, value);
  219. }
  220. }
  221. // Attributes with no value
  222. for (const name of attributes.replace(ATTRIBUTE_REGEXP, '').trim().split(' ')) {
  223. if (name) {
  224. element.setAttributeNS(null, this._getAttributeName(element.namespaceURI, name), '');
  225. }
  226. }
  227. }
  228. }
  229. /**
  230. * Returns attribute name.
  231. *
  232. * @param namespaceURI Namespace URI.
  233. * @param name Name.
  234. * @returns Attribute name based on namespace.
  235. */
  236. static _getAttributeName(namespaceURI, name) {
  237. if (namespaceURI === NamespaceURI_1.default.svg) {
  238. return name;
  239. }
  240. return name.toLowerCase();
  241. }
  242. }
  243. exports.default = XMLParser;
  244. //# sourceMappingURL=XMLParser.js.map