版博士V2.0程序
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

пре 1 година
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. const dataContext = require('./tokenizer-context-handlers/data')
  2. const openTagStartContext = require('./tokenizer-context-handlers/open-tag-start')
  3. const closeTagContext = require('./tokenizer-context-handlers/close-tag')
  4. const openTagEndContext = require('./tokenizer-context-handlers/open-tag-end')
  5. const attributesContext = require('./tokenizer-context-handlers/attributes')
  6. const attributeKeyContext = require('./tokenizer-context-handlers/attribute-key')
  7. const attributeValueContext = require('./tokenizer-context-handlers/attribute-value')
  8. const attributeValueBareContext = require('./tokenizer-context-handlers/attribute-value-bare')
  9. const attributeValueWrappedContext = require('./tokenizer-context-handlers/attribute-value-wrapped')
  10. const scriptContentContext = require('./tokenizer-context-handlers/script-tag-content')
  11. const styleContentContext = require('./tokenizer-context-handlers/style-tag-content')
  12. const doctypeStartContext = require('./tokenizer-context-handlers/doctype-start')
  13. const doctypeEndContextFactory = require('./tokenizer-context-handlers/doctype-end')
  14. const doctypeAttributesContext = require('./tokenizer-context-handlers/doctype-attributes')
  15. const doctypeAttributeWrappedContext = require('./tokenizer-context-handlers/doctype-attribute-wrapped')
  16. const doctypeAttributeBareEndContext = require('./tokenizer-context-handlers/doctype-attribute-bare')
  17. const commentContentContext = require('./tokenizer-context-handlers/comment-content')
  18. const { isWhitespace } = require('./helpers')
  19. const {
  20. DATA_CONTEXT,
  21. OPEN_TAG_START_CONTEXT,
  22. CLOSE_TAG_CONTEXT,
  23. ATTRIBUTES_CONTEXT,
  24. OPEN_TAG_END_CONTEXT,
  25. ATTRIBUTE_KEY_CONTEXT,
  26. ATTRIBUTE_VALUE_CONTEXT,
  27. ATTRIBUTE_VALUE_BARE_CONTEXT,
  28. ATTRIBUTE_VALUE_WRAPPED_CONTEXT,
  29. SCRIPT_CONTENT_CONTEXT,
  30. STYLE_CONTENT_CONTEXT,
  31. DOCTYPE_START_CONTEXT,
  32. DOCTYPE_END_CONTEXT,
  33. DOCTYPE_ATTRIBUTES_CONTEXT,
  34. DOCTYPE_ATTRIBUTE_WRAPPED_CONTEXT,
  35. DOCTYPE_ATTRIBUTE_BARE_CONTEXT,
  36. COMMENT_CONTENT_CONTEXT,
  37. } = require('./constants/tokenizer-contexts')
  38. const contextHandlersMap = {
  39. [DATA_CONTEXT]: dataContext,
  40. [OPEN_TAG_START_CONTEXT]: openTagStartContext,
  41. [CLOSE_TAG_CONTEXT]: closeTagContext,
  42. [ATTRIBUTES_CONTEXT]: attributesContext,
  43. [OPEN_TAG_END_CONTEXT]: openTagEndContext,
  44. [ATTRIBUTE_KEY_CONTEXT]: attributeKeyContext,
  45. [ATTRIBUTE_VALUE_CONTEXT]: attributeValueContext,
  46. [ATTRIBUTE_VALUE_BARE_CONTEXT]: attributeValueBareContext,
  47. [ATTRIBUTE_VALUE_WRAPPED_CONTEXT]: attributeValueWrappedContext,
  48. [SCRIPT_CONTENT_CONTEXT]: scriptContentContext,
  49. [STYLE_CONTENT_CONTEXT]: styleContentContext,
  50. [DOCTYPE_START_CONTEXT]: doctypeStartContext,
  51. [DOCTYPE_END_CONTEXT]: doctypeEndContextFactory,
  52. [DOCTYPE_ATTRIBUTES_CONTEXT]: doctypeAttributesContext,
  53. [DOCTYPE_ATTRIBUTE_WRAPPED_CONTEXT]: doctypeAttributeWrappedContext,
  54. [DOCTYPE_ATTRIBUTE_BARE_CONTEXT]: doctypeAttributeBareEndContext,
  55. [COMMENT_CONTENT_CONTEXT]: commentContentContext
  56. }
  57. function tokenizeChars (
  58. chars,
  59. state,
  60. tokens,
  61. { isFinalChunk, positionOffset }
  62. ) {
  63. let charIndex = state.caretPosition - positionOffset
  64. while (charIndex < chars.length) {
  65. const context = contextHandlersMap[state.currentContext]
  66. state.decisionBuffer += chars[charIndex]
  67. const nextChar = chars[charIndex + 1]
  68. let nextNoWhiteChar = nextChar
  69. let nextNoWhiteIndex = charIndex + 1
  70. while (isWhitespace(nextNoWhiteChar)) {
  71. nextNoWhiteIndex += 1
  72. nextNoWhiteChar = chars[nextNoWhiteIndex]
  73. }
  74. context.parseSyntax(state.decisionBuffer, state, tokens, nextChar, nextNoWhiteChar, chars, charIndex)
  75. charIndex = state.caretPosition - positionOffset
  76. }
  77. if (isFinalChunk) {
  78. const context = contextHandlersMap[state.currentContext]
  79. // Move the caret back, as at this point
  80. // it in the position outside of chars array,
  81. // and it should not be taken into account
  82. // when calculating characters range
  83. state.caretPosition--
  84. if (context.handleContentEnd !== undefined) {
  85. context.handleContentEnd(state, tokens)
  86. }
  87. }
  88. }
  89. function tokenize (
  90. content = '',
  91. existingState,
  92. { isFinalChunk } = {}
  93. ) {
  94. isFinalChunk = isFinalChunk === undefined ? true : isFinalChunk
  95. let state
  96. if (existingState !== undefined) {
  97. state = Object.assign({}, existingState)
  98. } else {
  99. state = {
  100. currentContext: DATA_CONTEXT,
  101. contextParams: {},
  102. decisionBuffer: '',
  103. accumulatedContent: '',
  104. caretPosition: 0
  105. }
  106. }
  107. const chars = state.decisionBuffer + content
  108. const tokens = []
  109. const positionOffset = state.caretPosition - state.decisionBuffer.length
  110. tokenizeChars(chars, state, tokens, {
  111. isFinalChunk,
  112. positionOffset
  113. })
  114. return { state, tokens }
  115. }
  116. module.exports = tokenize