postiats.js 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Artyom Shalkhakov. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *
  5. * Based on the ATS/Postiats lexer by Hongwei Xi.
  6. *--------------------------------------------------------------------------------------------*/
  7. define(["require", "exports"], function (require, exports) {
  8. 'use strict';
  9. Object.defineProperty(exports, "__esModule", { value: true });
  10. exports.conf = {
  11. comments: {
  12. lineComment: '//',
  13. blockComment: ['(*', '*)'],
  14. },
  15. brackets: [['{', '}'], ['[', ']'], ['(', ')'], ['<', '>']],
  16. autoClosingPairs: [
  17. { open: '"', close: '"', notIn: ['string', 'comment'] },
  18. { open: '{', close: '}', notIn: ['string', 'comment'] },
  19. { open: '[', close: ']', notIn: ['string', 'comment'] },
  20. { open: '(', close: ')', notIn: ['string', 'comment'] },
  21. ]
  22. };
  23. exports.language = {
  24. tokenPostfix: '.pats',
  25. // TODO: staload and dynload are followed by a special kind of string literals
  26. // with {$IDENTIFER} variables, and it also may make sense to highlight
  27. // the punctuation (. and / and \) differently.
  28. // Set defaultToken to invalid to see what you do not tokenize yet
  29. defaultToken: 'invalid',
  30. // keyword reference: https://github.com/githwxi/ATS-Postiats/blob/master/src/pats_lexing_token.dats
  31. keywords: [
  32. //
  33. "abstype",
  34. "abst0ype",
  35. "absprop",
  36. "absview",
  37. "absvtype",
  38. "absviewtype",
  39. "absvt0ype",
  40. "absviewt0ype",
  41. //
  42. "as",
  43. //
  44. "and",
  45. //
  46. "assume",
  47. //
  48. "begin",
  49. //
  50. /*
  51. "case", // CASE
  52. */
  53. //
  54. "classdec",
  55. //
  56. "datasort",
  57. //
  58. "datatype",
  59. "dataprop",
  60. "dataview",
  61. "datavtype",
  62. "dataviewtype",
  63. //
  64. "do",
  65. //
  66. "end",
  67. //
  68. "extern",
  69. "extype",
  70. "extvar",
  71. //
  72. "exception",
  73. //
  74. "fn",
  75. "fnx",
  76. "fun",
  77. //
  78. "prfn",
  79. "prfun",
  80. //
  81. "praxi",
  82. "castfn",
  83. //
  84. "if",
  85. "then",
  86. "else",
  87. //
  88. "ifcase",
  89. //
  90. "in",
  91. //
  92. "infix",
  93. "infixl",
  94. "infixr",
  95. "prefix",
  96. "postfix",
  97. //
  98. "implmnt",
  99. "implement",
  100. //
  101. "primplmnt",
  102. "primplement",
  103. //
  104. "import",
  105. //
  106. /*
  107. "lam", // LAM
  108. "llam", // LLAM
  109. "fix", // FIX
  110. */
  111. //
  112. "let",
  113. //
  114. "local",
  115. //
  116. "macdef",
  117. "macrodef",
  118. //
  119. "nonfix",
  120. //
  121. "symelim",
  122. "symintr",
  123. "overload",
  124. //
  125. "of",
  126. "op",
  127. //
  128. "rec",
  129. //
  130. "sif",
  131. "scase",
  132. //
  133. "sortdef",
  134. /*
  135. // HX: [sta] is now deprecated
  136. */
  137. "sta",
  138. "stacst",
  139. "stadef",
  140. "static",
  141. /*
  142. "stavar", // T_STAVAR
  143. */
  144. //
  145. "staload",
  146. "dynload",
  147. //
  148. "try",
  149. //
  150. "tkindef",
  151. //
  152. /*
  153. "type", // TYPE
  154. */
  155. "typedef",
  156. "propdef",
  157. "viewdef",
  158. "vtypedef",
  159. "viewtypedef",
  160. //
  161. /*
  162. "val", // VAL
  163. */
  164. "prval",
  165. //
  166. "var",
  167. "prvar",
  168. //
  169. "when",
  170. "where",
  171. //
  172. /*
  173. "for", // T_FOR
  174. "while", // T_WHILE
  175. */
  176. //
  177. "with",
  178. //
  179. "withtype",
  180. "withprop",
  181. "withview",
  182. "withvtype",
  183. "withviewtype",
  184. ],
  185. keywords_dlr: [
  186. "$delay",
  187. "$ldelay",
  188. //
  189. "$arrpsz",
  190. "$arrptrsize",
  191. //
  192. "$d2ctype",
  193. //
  194. "$effmask",
  195. "$effmask_ntm",
  196. "$effmask_exn",
  197. "$effmask_ref",
  198. "$effmask_wrt",
  199. "$effmask_all",
  200. //
  201. "$extern",
  202. "$extkind",
  203. "$extype",
  204. "$extype_struct",
  205. //
  206. "$extval",
  207. "$extfcall",
  208. "$extmcall",
  209. //
  210. "$literal",
  211. //
  212. "$myfilename",
  213. "$mylocation",
  214. "$myfunction",
  215. //
  216. "$lst",
  217. "$lst_t",
  218. "$lst_vt",
  219. "$list",
  220. "$list_t",
  221. "$list_vt",
  222. //
  223. "$rec",
  224. "$rec_t",
  225. "$rec_vt",
  226. "$record",
  227. "$record_t",
  228. "$record_vt",
  229. //
  230. "$tup",
  231. "$tup_t",
  232. "$tup_vt",
  233. "$tuple",
  234. "$tuple_t",
  235. "$tuple_vt",
  236. //
  237. "$break",
  238. "$continue",
  239. //
  240. "$raise",
  241. //
  242. "$showtype",
  243. //
  244. "$vcopyenv_v",
  245. "$vcopyenv_vt",
  246. //
  247. "$tempenver",
  248. //
  249. "$solver_assert",
  250. "$solver_verify",
  251. ],
  252. keywords_srp: [
  253. //
  254. "#if",
  255. "#ifdef",
  256. "#ifndef",
  257. //
  258. "#then",
  259. //
  260. "#elif",
  261. "#elifdef",
  262. "#elifndef",
  263. //
  264. "#else",
  265. "#endif",
  266. //
  267. "#error",
  268. //
  269. "#prerr",
  270. "#print",
  271. //
  272. "#assert",
  273. //
  274. "#undef",
  275. "#define",
  276. //
  277. "#include",
  278. "#require",
  279. //
  280. "#pragma",
  281. "#codegen2",
  282. "#codegen3",
  283. ],
  284. irregular_keyword_list: [
  285. "val+",
  286. "val-",
  287. "val",
  288. "case+",
  289. "case-",
  290. "case",
  291. "addr@",
  292. "addr",
  293. "fold@",
  294. "free@",
  295. "fix@",
  296. "fix",
  297. "lam@",
  298. "lam",
  299. "llam@",
  300. "llam",
  301. "viewt@ype+",
  302. "viewt@ype-",
  303. "viewt@ype",
  304. "viewtype+",
  305. "viewtype-",
  306. "viewtype",
  307. "view+",
  308. "view-",
  309. "view@",
  310. "view",
  311. "type+",
  312. "type-",
  313. "type",
  314. "vtype+",
  315. "vtype-",
  316. "vtype",
  317. "vt@ype+",
  318. "vt@ype-",
  319. "vt@ype",
  320. "viewt@ype+",
  321. "viewt@ype-",
  322. "viewt@ype",
  323. "viewtype+",
  324. "viewtype-",
  325. "viewtype",
  326. "prop+",
  327. "prop-",
  328. "prop",
  329. "type+",
  330. "type-",
  331. "type",
  332. "t@ype",
  333. "t@ype+",
  334. "t@ype-",
  335. "abst@ype",
  336. "abstype",
  337. "absviewt@ype",
  338. "absvt@ype",
  339. "for*",
  340. "for",
  341. "while*",
  342. "while"
  343. ],
  344. keywords_types: [
  345. 'bool',
  346. 'double',
  347. 'byte',
  348. 'int',
  349. 'short',
  350. 'char',
  351. 'void',
  352. 'unit',
  353. 'long',
  354. 'float',
  355. 'string',
  356. 'strptr'
  357. ],
  358. // TODO: reference for this?
  359. keywords_effects: [
  360. "0",
  361. "fun",
  362. "clo",
  363. "prf",
  364. "funclo",
  365. "cloptr",
  366. "cloref",
  367. "ref",
  368. "ntm",
  369. "1" // all effects
  370. ],
  371. operators: [
  372. "@",
  373. "!",
  374. "|",
  375. "`",
  376. ":",
  377. "$",
  378. ".",
  379. "=",
  380. "#",
  381. "~",
  382. //
  383. "..",
  384. "...",
  385. //
  386. "=>",
  387. // "=<", // T_EQLT
  388. "=<>",
  389. "=/=>",
  390. "=>>",
  391. "=/=>>",
  392. //
  393. "<",
  394. ">",
  395. //
  396. "><",
  397. //
  398. ".<",
  399. ">.",
  400. //
  401. ".<>.",
  402. //
  403. "->",
  404. //"-<", // T_MINUSLT
  405. "-<>",
  406. ],
  407. brackets: [
  408. { open: ',(', close: ')', token: 'delimiter.parenthesis' },
  409. { open: '`(', close: ')', token: 'delimiter.parenthesis' },
  410. { open: '%(', close: ')', token: 'delimiter.parenthesis' },
  411. { open: '\'(', close: ')', token: 'delimiter.parenthesis' },
  412. { open: '\'{', close: '}', token: 'delimiter.parenthesis' },
  413. { open: '@(', close: ')', token: 'delimiter.parenthesis' },
  414. { open: '@{', close: '}', token: 'delimiter.brace' },
  415. { open: '@[', close: ']', token: 'delimiter.square' },
  416. { open: '#[', close: ']', token: 'delimiter.square' },
  417. { open: '{', close: '}', token: 'delimiter.curly' },
  418. { open: '[', close: ']', token: 'delimiter.square' },
  419. { open: '(', close: ')', token: 'delimiter.parenthesis' },
  420. { open: '<', close: '>', token: 'delimiter.angle' }
  421. ],
  422. // we include these common regular expressions
  423. symbols: /[=><!~?:&|+\-*\/\^%]+/,
  424. IDENTFST: /[a-zA-Z_]/,
  425. IDENTRST: /[a-zA-Z0-9_'$]/,
  426. symbolic: /[%&+-./:=@~`^|*!$#?<>]/,
  427. digit: /[0-9]/,
  428. digitseq0: /@digit*/,
  429. xdigit: /[0-9A-Za-z]/,
  430. xdigitseq0: /@xdigit*/,
  431. INTSP: /[lLuU]/,
  432. FLOATSP: /[fFlL]/,
  433. fexponent: /[eE][+-]?[0-9]+/,
  434. fexponent_bin: /[pP][+-]?[0-9]+/,
  435. deciexp: /\.[0-9]*@fexponent?/,
  436. hexiexp: /\.[0-9a-zA-Z]*@fexponent_bin?/,
  437. irregular_keywords: /val[+-]?|case[+-]?|addr\@?|fold\@|free\@|fix\@?|lam\@?|llam\@?|prop[+-]?|type[+-]?|view[+-@]?|viewt@?ype[+-]?|t@?ype[+-]?|v(iew)?t@?ype[+-]?|abst@?ype|absv(iew)?t@?ype|for\*?|while\*?/,
  438. ESCHAR: /[ntvbrfa\\\?'"\(\[\{]/,
  439. start: 'root',
  440. // The main tokenizer for ATS/Postiats
  441. // reference: https://github.com/githwxi/ATS-Postiats/blob/master/src/pats_lexing.dats
  442. tokenizer: {
  443. root: [
  444. // lexing_blankseq0
  445. { regex: /[ \t\r\n]+/, action: { token: '' } },
  446. // NOTE: (*) is an invalid ML-like comment!
  447. { regex: /\(\*\)/, action: { token: 'invalid' } },
  448. { regex: /\(\*/, action: { token: 'comment', next: 'lexing_COMMENT_block_ml' } },
  449. { regex: /\(/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
  450. { regex: /\)/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
  451. { regex: /\[/, action: '@brackets' /*{ token: 'delimiter.bracket' }*/ },
  452. { regex: /\]/, action: '@brackets' /*{ token: 'delimiter.bracket' }*/ },
  453. { regex: /\{/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
  454. { regex: /\}/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
  455. // lexing_COMMA
  456. { regex: /,\(/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
  457. { regex: /,/, action: { token: 'delimiter.comma' } },
  458. { regex: /;/, action: { token: 'delimiter.semicolon' } },
  459. // lexing_AT
  460. { regex: /@\(/, action: '@brackets' /* { token: 'delimiter.parenthesis' }*/ },
  461. { regex: /@\[/, action: '@brackets' /* { token: 'delimiter.bracket' }*/ },
  462. { regex: /@\{/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
  463. // lexing_COLON
  464. { regex: /:</, action: { token: 'keyword', next: '@lexing_EFFECT_commaseq0' } },
  465. /*
  466. lexing_DOT:
  467. . // SYMBOLIC => lexing_IDENT_sym
  468. . FLOATDOT => lexing_FLOAT_deciexp
  469. . DIGIT => T_DOTINT
  470. */
  471. { regex: /\.@symbolic+/, action: { token: 'identifier.sym' } },
  472. // FLOATDOT case
  473. { regex: /\.@digit*@fexponent@FLOATSP*/, action: { token: 'number.float' } },
  474. { regex: /\.@digit+/, action: { token: 'number.float' } },
  475. // lexing_DOLLAR:
  476. // '$' IDENTFST IDENTRST* => lexing_IDENT_dlr, _ => lexing_IDENT_sym
  477. {
  478. regex: /\$@IDENTFST@IDENTRST*/,
  479. action: {
  480. cases: {
  481. '@keywords_dlr': { token: 'keyword.dlr' },
  482. '@default': { token: 'namespace' },
  483. }
  484. }
  485. },
  486. // lexing_SHARP:
  487. // '#' IDENTFST IDENTRST* => lexing_ident_srp, _ => lexing_IDENT_sym
  488. {
  489. regex: /\#@IDENTFST@IDENTRST*/,
  490. action: {
  491. cases: {
  492. '@keywords_srp': { token: 'keyword.srp' },
  493. '@default': { token: 'identifier' },
  494. }
  495. }
  496. },
  497. // lexing_PERCENT:
  498. { regex: /%\(/, action: { token: 'delimiter.parenthesis' } },
  499. { regex: /^%{(#|\^|\$)?/, action: { token: 'keyword', next: '@lexing_EXTCODE', nextEmbedded: 'text/javascript' } },
  500. { regex: /^%}/, action: { token: 'keyword' } },
  501. // lexing_QUOTE
  502. { regex: /'\(/, action: { token: 'delimiter.parenthesis' } },
  503. { regex: /'\[/, action: { token: 'delimiter.bracket' } },
  504. { regex: /'\{/, action: { token: 'delimiter.brace' } },
  505. [/(')(\\@ESCHAR|\\[xX]@xdigit+|\\@digit+)(')/, ['string', 'string.escape', 'string']],
  506. [/'[^\\']'/, 'string'],
  507. // lexing_DQUOTE
  508. [/"/, 'string.quote', '@lexing_DQUOTE'],
  509. // lexing_BQUOTE
  510. { regex: /`\(/, action: '@brackets' /* { token: 'delimiter.parenthesis' }*/ },
  511. // TODO: otherwise, try lexing_IDENT_sym
  512. { regex: /\\/, action: { token: 'punctuation' } },
  513. // lexing_IDENT_alp:
  514. // NOTE: (?!regex) is syntax for "not-followed-by" regex
  515. // to resolve ambiguity such as foreach$fwork being incorrectly lexed as [for] [each$fwork]!
  516. { regex: /@irregular_keywords(?!@IDENTRST)/, action: { token: 'keyword' } },
  517. {
  518. regex: /@IDENTFST@IDENTRST*[<!\[]?/,
  519. action: {
  520. cases: {
  521. // TODO: dynload and staload should be specially parsed
  522. // dynload whitespace+ "special_string"
  523. // this special string is really:
  524. // '/' '\\' '.' => punctuation
  525. // ({\$)([a-zA-Z_][a-zA-Z_0-9]*)(}) => punctuation,keyword,punctuation
  526. // [^"] => identifier/literal
  527. '@keywords': { token: 'keyword' },
  528. '@keywords_types': { token: 'type' },
  529. '@default': { token: 'identifier' }
  530. }
  531. }
  532. },
  533. // lexing_IDENT_sym:
  534. { regex: /\/\/\/\//, action: { token: 'comment', next: '@lexing_COMMENT_rest' } },
  535. { regex: /\/\/.*$/, action: { token: 'comment' } },
  536. { regex: /\/\*/, action: { token: 'comment', next: '@lexing_COMMENT_block_c' } },
  537. // AS-20160627: specifically for effect annotations
  538. { regex: /-<|=</, action: { token: 'keyword', next: '@lexing_EFFECT_commaseq0' } },
  539. {
  540. regex: /@symbolic+/,
  541. action: {
  542. cases: {
  543. '@operators': 'keyword',
  544. '@default': 'operator'
  545. }
  546. }
  547. },
  548. // lexing_ZERO:
  549. // FIXME: this one is quite messy/unfinished yet
  550. // TODO: lexing_INT_hex
  551. // - testing_hexiexp => lexing_FLOAT_hexiexp
  552. // - testing_fexponent_bin => lexing_FLOAT_hexiexp
  553. // - testing_intspseq0 => T_INT_hex
  554. // lexing_INT_hex:
  555. { regex: /0[xX]@xdigit+(@hexiexp|@fexponent_bin)@FLOATSP*/, action: { token: 'number.float' } },
  556. { regex: /0[xX]@xdigit+@INTSP*/, action: { token: 'number.hex' } },
  557. { regex: /0[0-7]+(?![0-9])@INTSP*/, action: { token: 'number.octal' } },
  558. //{regex: /0/, action: { token: 'number' } }, // INTZERO
  559. // lexing_INT_dec:
  560. // - testing_deciexp => lexing_FLOAT_deciexp
  561. // - testing_fexponent => lexing_FLOAT_deciexp
  562. // - otherwise => intspseq0 ([0-9]*[lLuU]?)
  563. { regex: /@digit+(@fexponent|@deciexp)@FLOATSP*/, action: { token: 'number.float' } },
  564. { regex: /@digit@digitseq0@INTSP*/, action: { token: 'number.decimal' } },
  565. // DIGIT, if followed by digitseq0, is lexing_INT_dec
  566. { regex: /@digit+@INTSP*/, action: { token: 'number' } },
  567. ],
  568. lexing_COMMENT_block_ml: [
  569. [/[^\(\*]+/, 'comment'],
  570. [/\(\*/, 'comment', '@push'],
  571. [/\(\*/, 'comment.invalid'],
  572. [/\*\)/, 'comment', '@pop'],
  573. [/\*/, 'comment']
  574. ],
  575. lexing_COMMENT_block_c: [
  576. [/[^\/*]+/, 'comment'],
  577. // [/\/\*/, 'comment', '@push' ], // nested C-style block comments not allowed
  578. // [/\/\*/, 'comment.invalid' ], // NOTE: this breaks block comments in the shape of /* //*/
  579. [/\*\//, 'comment', '@pop'],
  580. [/[\/*]/, 'comment']
  581. ],
  582. lexing_COMMENT_rest: [
  583. [/$/, 'comment', '@pop'],
  584. [/.*/, 'comment']
  585. ],
  586. // NOTE: added by AS, specifically for highlighting
  587. lexing_EFFECT_commaseq0: [
  588. {
  589. regex: /@IDENTFST@IDENTRST+|@digit+/,
  590. action: {
  591. cases: {
  592. '@keywords_effects': { token: 'type.effect' },
  593. '@default': { token: 'identifier' }
  594. }
  595. }
  596. },
  597. { regex: /,/, action: { token: 'punctuation' } },
  598. { regex: />/, action: { token: '@rematch', next: '@pop' } },
  599. ],
  600. lexing_EXTCODE: [
  601. { regex: /^%}/, action: { token: '@rematch', next: '@pop', nextEmbedded: '@pop' } },
  602. { regex: /[^%]+/, action: '' },
  603. ],
  604. lexing_DQUOTE: [
  605. { regex: /"/, action: { token: 'string.quote', next: '@pop' } },
  606. // AS-20160628: additional hi-lighting for variables in staload/dynload strings
  607. { regex: /(\{\$)(@IDENTFST@IDENTRST*)(\})/, action: [{ token: 'string.escape' }, { token: 'identifier' }, { token: 'string.escape' }] },
  608. { regex: /\\$/, action: { token: 'string.escape' } },
  609. { regex: /\\(@ESCHAR|[xX]@xdigit+|@digit+)/, action: { token: 'string.escape' } },
  610. { regex: /[^\\"]+/, action: { token: 'string' } }
  611. ],
  612. },
  613. };
  614. });