版博士V2.0程序
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

299 rivejä
9.1 KiB

  1. import { Transform } from 'stream'
  2. declare function tokenize(
  3. html: string,
  4. existingState?: Tokenizer.State,
  5. options?: Tokenizer.Options
  6. ): Tokenizer.Result
  7. declare function constructTree(
  8. tokens: Tokenizer.AnyToken[],
  9. existingState?: TreeConstructor.State
  10. ): TreeConstructor.Result
  11. declare class StreamTokenizer extends Transform {}
  12. declare class StreamTreeConstructor extends Transform {}
  13. export namespace Tokenizer {
  14. namespace ContextTypes {
  15. type Data = 'tokenizer-context:data'
  16. type OpenTagStart = 'tokenizer-context:open-tag-start'
  17. type CloseTag = 'tokenizer-context:close-tag'
  18. type Attributes = 'tokenizer-context:attributes'
  19. type OpenTagEnd = 'tokenizer-context:open-tag-end'
  20. type AttributeKey = 'tokenizer-context:attribute-key'
  21. type AttributeValue = 'tokenizer-context:attribute-value'
  22. type AttributeValueBare = 'tokenizer-context:attribute-value-bare'
  23. type AttributeValueWrapped = 'tokenizer-context:attribute-value-wrapped'
  24. type ScriptContent = 'tokenizer-context:script-content'
  25. type StyleContent = 'tokenizer-context:style-content'
  26. type DoctypeStart = 'tokenizer-context:doctype-start'
  27. type DoctypeEnd = 'tokenizer-context:doctype-end'
  28. type DoctypeAttributes = 'tokenizer-context:doctype-attributes'
  29. type DoctypeAttributeWrapped = 'tokenizer-context:doctype-attribute-wrapped'
  30. type DoctypeAttributeBare = 'tokenizer-context:doctype-attribute-bare'
  31. type CommentStart = 'tokenizer-context:comment-start'
  32. type CommentContent = 'tokenizer-context:comment-content'
  33. type CommentEnd = 'tokenizer-context:comment-end'
  34. type AnyContextType =
  35. | Data
  36. | OpenTagStart
  37. | CloseTag
  38. | Attributes
  39. | OpenTagEnd
  40. | AttributeKey
  41. | AttributeValue
  42. | AttributeValueBare
  43. | AttributeValueWrapped
  44. | ScriptContent
  45. | StyleContent
  46. | DoctypeStart
  47. | DoctypeEnd
  48. | DoctypeAttributes
  49. | DoctypeAttributeWrapped
  50. | DoctypeAttributeBare
  51. | CommentStart
  52. | CommentContent
  53. | CommentEnd
  54. }
  55. namespace TokenTypes {
  56. type Text = 'token:text'
  57. type OpenTagStart = 'token:open-tag-start'
  58. type AttributeKey = 'token:attribute-key'
  59. type AttributeAssigment = 'token:attribute-assignment'
  60. type AttributeValueWrapperStart = 'token:attribute-value-wrapper-start'
  61. type AttributeValue = 'token:attribute-value'
  62. type AttributeValueWrapperEnd = 'token:attribute-value-wrapper-end'
  63. type OpenTagEnd = 'token:open-tag-end'
  64. type CloseTag = 'token:close-tag'
  65. type OpenTagStartScript = 'token:open-tag-start-script'
  66. type ScriptTagContent = 'token:script-tag-content'
  67. type OpenTagEndScript = 'token:open-tag-end-script'
  68. type CloseTagScript = 'token:close-tag-script'
  69. type OpenTagStartStyle = 'token:open-tag-start-style'
  70. type StyleTagContent = 'token:style-tag-content'
  71. type OpenTagEndStyle = 'token:open-tag-end-style'
  72. type CloseTagStyle = 'token:close-tag-style'
  73. type DoctypeStart = 'token:doctype-start'
  74. type DoctypeEnd = 'token:doctype-end'
  75. type DoctypeAttributeWrapperStart = 'token:doctype-attribute-wrapper-start'
  76. type DoctypeAttribute = 'token:doctype-attribute'
  77. type DoctypeAttributeWrapperEnd = 'token:doctype-attribute-wrapper-end'
  78. type CommentStart = 'token:comment-start'
  79. type CommentContent = 'token:comment-content'
  80. type CommentEnd = 'token:comment-end'
  81. type AnyTokenType =
  82. | Text
  83. | OpenTagStart
  84. | AttributeKey
  85. | AttributeAssigment
  86. | AttributeValueWrapperStart
  87. | AttributeValue
  88. | AttributeValueWrapperEnd
  89. | OpenTagEnd
  90. | CloseTag
  91. | OpenTagStartScript
  92. | ScriptTagContent
  93. | OpenTagEndScript
  94. | CloseTagScript
  95. | OpenTagStartStyle
  96. | StyleTagContent
  97. | OpenTagEndStyle
  98. | CloseTagStyle
  99. | DoctypeStart
  100. | DoctypeEnd
  101. | DoctypeAttributeWrapperStart
  102. | DoctypeAttribute
  103. | DoctypeAttributeWrapperEnd
  104. | CommentStart
  105. | CommentContent
  106. | CommentEnd
  107. }
  108. interface Options {
  109. isFinalChunk: boolean
  110. }
  111. interface State {
  112. currentContext: string
  113. contextParams: ContextParams
  114. decisionBuffer: string
  115. accumulatedContent: string
  116. caretPosition: number
  117. }
  118. interface Result {
  119. state: State
  120. tokens: AnyToken[]
  121. }
  122. type AnyToken = Token<TokenTypes.AnyTokenType>
  123. interface Token<T extends TokenTypes.AnyTokenType> {
  124. type: T
  125. content: string
  126. startPosition: number
  127. endPosition: number
  128. }
  129. type ContextParams = {
  130. [C in ContextTypes.AnyContextType]?: {
  131. wrapper?: '"' | '\'',
  132. tagName?: string
  133. }
  134. }
  135. }
  136. export namespace TreeConstructor {
  137. namespace NodeTypes {
  138. type Document = 'document'
  139. type Doctype = 'doctype'
  140. type Tag = 'tag'
  141. type Text = 'text'
  142. type Comment = 'comment'
  143. type Script = 'script'
  144. type Style = 'style'
  145. type AnyNodeType =
  146. | Document
  147. | Doctype
  148. | Tag
  149. | Text
  150. | Comment
  151. | Script
  152. | Style
  153. }
  154. namespace ContextTypes {
  155. type TagContent = 'tree-constructor-context:tag-content'
  156. type Tag = 'tree-constructor-context:tag'
  157. type TagName = 'tree-constructor-context:tag-name'
  158. type Attributes = 'tree-constructor-context:attributes'
  159. type Attribute = 'tree-constructor-context:attribute'
  160. type AttributeValue = 'tree-constructor-context:attribute-value'
  161. type Comment = 'tree-constructor-context:comment'
  162. type Doctype = 'tree-constructor-context:doctype'
  163. type DoctypeAttributes = 'tree-constructor-context:doctype-attributes'
  164. type DoctypeAttribute = 'tree-constructor-context:doctype-attribute'
  165. type ScriptTag = 'tree-constructor-context:script-tag'
  166. type StyleTag = 'tree-constructor-context:style-tag'
  167. type AnyContextType =
  168. | TagContent
  169. | Tag
  170. | TagName
  171. | Attributes
  172. | Attribute
  173. | AttributeValue
  174. | Comment
  175. | Doctype
  176. | DoctypeAttributes
  177. | DoctypeAttribute
  178. | ScriptTag
  179. | StyleTag
  180. }
  181. namespace NodeContents {
  182. interface Document {
  183. children: AnyNode[]
  184. }
  185. interface Doctype {
  186. start: Tokenizer.Token<Tokenizer.TokenTypes.DoctypeStart>
  187. attributes?: DoctypeAttribute[]
  188. end: Tokenizer.Token<Tokenizer.TokenTypes.DoctypeEnd>
  189. }
  190. interface Text {
  191. value: Tokenizer.Token<Tokenizer.TokenTypes.Text>
  192. }
  193. interface Tag {
  194. name: string
  195. selfClosing: boolean
  196. openStart: Tokenizer.Token<Tokenizer.TokenTypes.OpenTagStart>
  197. attributes?: TagAttribute[]
  198. openEnd: Tokenizer.Token<Tokenizer.TokenTypes.OpenTagEnd>
  199. children?: AnyNode[]
  200. close: Tokenizer.Token<Tokenizer.TokenTypes.CloseTag>
  201. }
  202. interface Comment {
  203. start: Tokenizer.Token<Tokenizer.TokenTypes.CommentStart>
  204. value: Tokenizer.Token<Tokenizer.TokenTypes.CommentContent>
  205. end: Tokenizer.Token<Tokenizer.TokenTypes.CommentEnd>
  206. }
  207. interface Script {
  208. openStart: Tokenizer.Token<Tokenizer.TokenTypes.OpenTagStartScript>
  209. attributes?: TagAttribute[]
  210. openEnd: Tokenizer.Token<Tokenizer.TokenTypes.OpenTagEndScript>
  211. value: Tokenizer.Token<Tokenizer.TokenTypes.ScriptTagContent>
  212. close: Tokenizer.Token<Tokenizer.TokenTypes.CloseTagScript>
  213. }
  214. interface Style {
  215. openStart: Tokenizer.Token<Tokenizer.TokenTypes.OpenTagStartStyle>,
  216. attributes?: TagAttribute[],
  217. openEnd: Tokenizer.Token<Tokenizer.TokenTypes.OpenTagEndStyle>,
  218. value: Tokenizer.Token<Tokenizer.TokenTypes.StyleTagContent>,
  219. close: Tokenizer.Token<Tokenizer.TokenTypes.CloseTagStyle>
  220. }
  221. type AnyNodeContent =
  222. | Document
  223. | Doctype
  224. | Text
  225. | Tag
  226. | Comment
  227. | Script
  228. | Style
  229. }
  230. interface State {
  231. caretPosition: number
  232. currentContext: ContextTypes.AnyContextType
  233. currentNode: NodeTypes.AnyNodeType
  234. rootNode: NodeTypes.Document
  235. }
  236. interface Result {
  237. state: State
  238. ast: AST
  239. }
  240. type AST = DocumentNode
  241. interface Node<T extends NodeTypes.AnyNodeType, C extends NodeContents.AnyNodeContent> {
  242. nodeType: T
  243. content: C
  244. }
  245. type AnyNode = Node<NodeTypes.AnyNodeType, NodeContents.AnyNodeContent>
  246. type DocumentNode = Node<NodeTypes.Document, NodeContents.Document>
  247. type DoctypeNode = Node<NodeTypes.Doctype, NodeContents.Doctype>
  248. type TextNode = Node<NodeTypes.Text, NodeContents.Text>
  249. type TagNode = Node<NodeTypes.Tag, NodeContents.Tag>
  250. type CommentNode = Node<NodeTypes.Comment, NodeContents.Comment>
  251. type ScriptNode = Node<NodeTypes.Script, NodeContents.Script>
  252. type StyleNode = Node<NodeTypes.Style, NodeContents.Style>
  253. interface DoctypeAttribute {
  254. startWrapper?: Tokenizer.Token<Tokenizer.TokenTypes.DoctypeAttributeWrapperStart>,
  255. value: Tokenizer.Token<Tokenizer.TokenTypes.DoctypeAttribute>,
  256. endWrapper?: Tokenizer.Token<Tokenizer.TokenTypes.DoctypeAttributeWrapperEnd>
  257. }
  258. interface TagAttribute {
  259. key?: Tokenizer.Token<Tokenizer.TokenTypes.AttributeKey>,
  260. startWrapper?: Tokenizer.Token<Tokenizer.TokenTypes.AttributeValueWrapperStart>,
  261. value?: Tokenizer.Token<Tokenizer.TokenTypes.AttributeValue>,
  262. endWrapper?: Tokenizer.Token<Tokenizer.TokenTypes.AttributeValueWrapperEnd>
  263. }
  264. }