123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615 |
- /*---------------------------------------------------------------------------------------------
- * Copyright (c) Artyom Shalkhakov. All rights reserved.
- * Licensed under the MIT License. See License.txt in the project root for license information.
- *
- * Based on the ATS/Postiats lexer by Hongwei Xi.
- *--------------------------------------------------------------------------------------------*/
- define(["require", "exports"], function (require, exports) {
- 'use strict';
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.conf = {
- comments: {
- lineComment: '//',
- blockComment: ['(*', '*)'],
- },
- brackets: [['{', '}'], ['[', ']'], ['(', ')'], ['<', '>']],
- autoClosingPairs: [
- { open: '"', close: '"', notIn: ['string', 'comment'] },
- { open: '{', close: '}', notIn: ['string', 'comment'] },
- { open: '[', close: ']', notIn: ['string', 'comment'] },
- { open: '(', close: ')', notIn: ['string', 'comment'] },
- ]
- };
- exports.language = {
- tokenPostfix: '.pats',
- // TODO: staload and dynload are followed by a special kind of string literals
- // with {$IDENTIFER} variables, and it also may make sense to highlight
- // the punctuation (. and / and \) differently.
- // Set defaultToken to invalid to see what you do not tokenize yet
- defaultToken: 'invalid',
- // keyword reference: https://github.com/githwxi/ATS-Postiats/blob/master/src/pats_lexing_token.dats
- keywords: [
- //
- "abstype",
- "abst0ype",
- "absprop",
- "absview",
- "absvtype",
- "absviewtype",
- "absvt0ype",
- "absviewt0ype",
- //
- "as",
- //
- "and",
- //
- "assume",
- //
- "begin",
- //
- /*
- "case", // CASE
- */
- //
- "classdec",
- //
- "datasort",
- //
- "datatype",
- "dataprop",
- "dataview",
- "datavtype",
- "dataviewtype",
- //
- "do",
- //
- "end",
- //
- "extern",
- "extype",
- "extvar",
- //
- "exception",
- //
- "fn",
- "fnx",
- "fun",
- //
- "prfn",
- "prfun",
- //
- "praxi",
- "castfn",
- //
- "if",
- "then",
- "else",
- //
- "ifcase",
- //
- "in",
- //
- "infix",
- "infixl",
- "infixr",
- "prefix",
- "postfix",
- //
- "implmnt",
- "implement",
- //
- "primplmnt",
- "primplement",
- //
- "import",
- //
- /*
- "lam", // LAM
- "llam", // LLAM
- "fix", // FIX
- */
- //
- "let",
- //
- "local",
- //
- "macdef",
- "macrodef",
- //
- "nonfix",
- //
- "symelim",
- "symintr",
- "overload",
- //
- "of",
- "op",
- //
- "rec",
- //
- "sif",
- "scase",
- //
- "sortdef",
- /*
- // HX: [sta] is now deprecated
- */
- "sta",
- "stacst",
- "stadef",
- "static",
- /*
- "stavar", // T_STAVAR
- */
- //
- "staload",
- "dynload",
- //
- "try",
- //
- "tkindef",
- //
- /*
- "type", // TYPE
- */
- "typedef",
- "propdef",
- "viewdef",
- "vtypedef",
- "viewtypedef",
- //
- /*
- "val", // VAL
- */
- "prval",
- //
- "var",
- "prvar",
- //
- "when",
- "where",
- //
- /*
- "for", // T_FOR
- "while", // T_WHILE
- */
- //
- "with",
- //
- "withtype",
- "withprop",
- "withview",
- "withvtype",
- "withviewtype",
- ],
- keywords_dlr: [
- "$delay",
- "$ldelay",
- //
- "$arrpsz",
- "$arrptrsize",
- //
- "$d2ctype",
- //
- "$effmask",
- "$effmask_ntm",
- "$effmask_exn",
- "$effmask_ref",
- "$effmask_wrt",
- "$effmask_all",
- //
- "$extern",
- "$extkind",
- "$extype",
- "$extype_struct",
- //
- "$extval",
- "$extfcall",
- "$extmcall",
- //
- "$literal",
- //
- "$myfilename",
- "$mylocation",
- "$myfunction",
- //
- "$lst",
- "$lst_t",
- "$lst_vt",
- "$list",
- "$list_t",
- "$list_vt",
- //
- "$rec",
- "$rec_t",
- "$rec_vt",
- "$record",
- "$record_t",
- "$record_vt",
- //
- "$tup",
- "$tup_t",
- "$tup_vt",
- "$tuple",
- "$tuple_t",
- "$tuple_vt",
- //
- "$break",
- "$continue",
- //
- "$raise",
- //
- "$showtype",
- //
- "$vcopyenv_v",
- "$vcopyenv_vt",
- //
- "$tempenver",
- //
- "$solver_assert",
- "$solver_verify",
- ],
- keywords_srp: [
- //
- "#if",
- "#ifdef",
- "#ifndef",
- //
- "#then",
- //
- "#elif",
- "#elifdef",
- "#elifndef",
- //
- "#else",
- "#endif",
- //
- "#error",
- //
- "#prerr",
- "#print",
- //
- "#assert",
- //
- "#undef",
- "#define",
- //
- "#include",
- "#require",
- //
- "#pragma",
- "#codegen2",
- "#codegen3",
- ],
- irregular_keyword_list: [
- "val+",
- "val-",
- "val",
- "case+",
- "case-",
- "case",
- "addr@",
- "addr",
- "fold@",
- "free@",
- "fix@",
- "fix",
- "lam@",
- "lam",
- "llam@",
- "llam",
- "viewt@ype+",
- "viewt@ype-",
- "viewt@ype",
- "viewtype+",
- "viewtype-",
- "viewtype",
- "view+",
- "view-",
- "view@",
- "view",
- "type+",
- "type-",
- "type",
- "vtype+",
- "vtype-",
- "vtype",
- "vt@ype+",
- "vt@ype-",
- "vt@ype",
- "viewt@ype+",
- "viewt@ype-",
- "viewt@ype",
- "viewtype+",
- "viewtype-",
- "viewtype",
- "prop+",
- "prop-",
- "prop",
- "type+",
- "type-",
- "type",
- "t@ype",
- "t@ype+",
- "t@ype-",
- "abst@ype",
- "abstype",
- "absviewt@ype",
- "absvt@ype",
- "for*",
- "for",
- "while*",
- "while"
- ],
- keywords_types: [
- 'bool',
- 'double',
- 'byte',
- 'int',
- 'short',
- 'char',
- 'void',
- 'unit',
- 'long',
- 'float',
- 'string',
- 'strptr'
- ],
- // TODO: reference for this?
- keywords_effects: [
- "0",
- "fun",
- "clo",
- "prf",
- "funclo",
- "cloptr",
- "cloref",
- "ref",
- "ntm",
- "1" // all effects
- ],
- operators: [
- "@",
- "!",
- "|",
- "`",
- ":",
- "$",
- ".",
- "=",
- "#",
- "~",
- //
- "..",
- "...",
- //
- "=>",
- // "=<", // T_EQLT
- "=<>",
- "=/=>",
- "=>>",
- "=/=>>",
- //
- "<",
- ">",
- //
- "><",
- //
- ".<",
- ">.",
- //
- ".<>.",
- //
- "->",
- //"-<", // T_MINUSLT
- "-<>",
- ],
- brackets: [
- { open: ',(', close: ')', token: 'delimiter.parenthesis' },
- { open: '`(', close: ')', token: 'delimiter.parenthesis' },
- { open: '%(', close: ')', token: 'delimiter.parenthesis' },
- { open: '\'(', close: ')', token: 'delimiter.parenthesis' },
- { open: '\'{', close: '}', token: 'delimiter.parenthesis' },
- { open: '@(', close: ')', token: 'delimiter.parenthesis' },
- { open: '@{', close: '}', token: 'delimiter.brace' },
- { open: '@[', close: ']', token: 'delimiter.square' },
- { open: '#[', close: ']', token: 'delimiter.square' },
- { open: '{', close: '}', token: 'delimiter.curly' },
- { open: '[', close: ']', token: 'delimiter.square' },
- { open: '(', close: ')', token: 'delimiter.parenthesis' },
- { open: '<', close: '>', token: 'delimiter.angle' }
- ],
- // we include these common regular expressions
- symbols: /[=><!~?:&|+\-*\/\^%]+/,
- IDENTFST: /[a-zA-Z_]/,
- IDENTRST: /[a-zA-Z0-9_'$]/,
- symbolic: /[%&+-./:=@~`^|*!$#?<>]/,
- digit: /[0-9]/,
- digitseq0: /@digit*/,
- xdigit: /[0-9A-Za-z]/,
- xdigitseq0: /@xdigit*/,
- INTSP: /[lLuU]/,
- FLOATSP: /[fFlL]/,
- fexponent: /[eE][+-]?[0-9]+/,
- fexponent_bin: /[pP][+-]?[0-9]+/,
- deciexp: /\.[0-9]*@fexponent?/,
- hexiexp: /\.[0-9a-zA-Z]*@fexponent_bin?/,
- irregular_keywords: /val[+-]?|case[+-]?|addr\@?|fold\@|free\@|fix\@?|lam\@?|llam\@?|prop[+-]?|type[+-]?|view[+-@]?|viewt@?ype[+-]?|t@?ype[+-]?|v(iew)?t@?ype[+-]?|abst@?ype|absv(iew)?t@?ype|for\*?|while\*?/,
- ESCHAR: /[ntvbrfa\\\?'"\(\[\{]/,
- start: 'root',
- // The main tokenizer for ATS/Postiats
- // reference: https://github.com/githwxi/ATS-Postiats/blob/master/src/pats_lexing.dats
- tokenizer: {
- root: [
- // lexing_blankseq0
- { regex: /[ \t\r\n]+/, action: { token: '' } },
- // NOTE: (*) is an invalid ML-like comment!
- { regex: /\(\*\)/, action: { token: 'invalid' } },
- { regex: /\(\*/, action: { token: 'comment', next: 'lexing_COMMENT_block_ml' } },
- { regex: /\(/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
- { regex: /\)/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
- { regex: /\[/, action: '@brackets' /*{ token: 'delimiter.bracket' }*/ },
- { regex: /\]/, action: '@brackets' /*{ token: 'delimiter.bracket' }*/ },
- { regex: /\{/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
- { regex: /\}/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
- // lexing_COMMA
- { regex: /,\(/, action: '@brackets' /*{ token: 'delimiter.parenthesis' }*/ },
- { regex: /,/, action: { token: 'delimiter.comma' } },
- { regex: /;/, action: { token: 'delimiter.semicolon' } },
- // lexing_AT
- { regex: /@\(/, action: '@brackets' /* { token: 'delimiter.parenthesis' }*/ },
- { regex: /@\[/, action: '@brackets' /* { token: 'delimiter.bracket' }*/ },
- { regex: /@\{/, action: '@brackets' /*{ token: 'delimiter.brace' }*/ },
- // lexing_COLON
- { regex: /:</, action: { token: 'keyword', next: '@lexing_EFFECT_commaseq0' } },
- /*
- lexing_DOT:
-
- . // SYMBOLIC => lexing_IDENT_sym
- . FLOATDOT => lexing_FLOAT_deciexp
- . DIGIT => T_DOTINT
- */
- { regex: /\.@symbolic+/, action: { token: 'identifier.sym' } },
- // FLOATDOT case
- { regex: /\.@digit*@fexponent@FLOATSP*/, action: { token: 'number.float' } },
- { regex: /\.@digit+/, action: { token: 'number.float' } },
- // lexing_DOLLAR:
- // '$' IDENTFST IDENTRST* => lexing_IDENT_dlr, _ => lexing_IDENT_sym
- {
- regex: /\$@IDENTFST@IDENTRST*/,
- action: {
- cases: {
- '@keywords_dlr': { token: 'keyword.dlr' },
- '@default': { token: 'namespace' },
- }
- }
- },
- // lexing_SHARP:
- // '#' IDENTFST IDENTRST* => lexing_ident_srp, _ => lexing_IDENT_sym
- {
- regex: /\#@IDENTFST@IDENTRST*/,
- action: {
- cases: {
- '@keywords_srp': { token: 'keyword.srp' },
- '@default': { token: 'identifier' },
- }
- }
- },
- // lexing_PERCENT:
- { regex: /%\(/, action: { token: 'delimiter.parenthesis' } },
- { regex: /^%{(#|\^|\$)?/, action: { token: 'keyword', next: '@lexing_EXTCODE', nextEmbedded: 'text/javascript' } },
- { regex: /^%}/, action: { token: 'keyword' } },
- // lexing_QUOTE
- { regex: /'\(/, action: { token: 'delimiter.parenthesis' } },
- { regex: /'\[/, action: { token: 'delimiter.bracket' } },
- { regex: /'\{/, action: { token: 'delimiter.brace' } },
- [/(')(\\@ESCHAR|\\[xX]@xdigit+|\\@digit+)(')/, ['string', 'string.escape', 'string']],
- [/'[^\\']'/, 'string'],
- // lexing_DQUOTE
- [/"/, 'string.quote', '@lexing_DQUOTE'],
- // lexing_BQUOTE
- { regex: /`\(/, action: '@brackets' /* { token: 'delimiter.parenthesis' }*/ },
- // TODO: otherwise, try lexing_IDENT_sym
- { regex: /\\/, action: { token: 'punctuation' } },
- // lexing_IDENT_alp:
- // NOTE: (?!regex) is syntax for "not-followed-by" regex
- // to resolve ambiguity such as foreach$fwork being incorrectly lexed as [for] [each$fwork]!
- { regex: /@irregular_keywords(?!@IDENTRST)/, action: { token: 'keyword' } },
- {
- regex: /@IDENTFST@IDENTRST*[<!\[]?/,
- action: {
- cases: {
- // TODO: dynload and staload should be specially parsed
- // dynload whitespace+ "special_string"
- // this special string is really:
- // '/' '\\' '.' => punctuation
- // ({\$)([a-zA-Z_][a-zA-Z_0-9]*)(}) => punctuation,keyword,punctuation
- // [^"] => identifier/literal
- '@keywords': { token: 'keyword' },
- '@keywords_types': { token: 'type' },
- '@default': { token: 'identifier' }
- }
- }
- },
- // lexing_IDENT_sym:
- { regex: /\/\/\/\//, action: { token: 'comment', next: '@lexing_COMMENT_rest' } },
- { regex: /\/\/.*$/, action: { token: 'comment' } },
- { regex: /\/\*/, action: { token: 'comment', next: '@lexing_COMMENT_block_c' } },
- // AS-20160627: specifically for effect annotations
- { regex: /-<|=</, action: { token: 'keyword', next: '@lexing_EFFECT_commaseq0' } },
- {
- regex: /@symbolic+/,
- action: {
- cases: {
- '@operators': 'keyword',
- '@default': 'operator'
- }
- }
- },
- // lexing_ZERO:
- // FIXME: this one is quite messy/unfinished yet
- // TODO: lexing_INT_hex
- // - testing_hexiexp => lexing_FLOAT_hexiexp
- // - testing_fexponent_bin => lexing_FLOAT_hexiexp
- // - testing_intspseq0 => T_INT_hex
- // lexing_INT_hex:
- { regex: /0[xX]@xdigit+(@hexiexp|@fexponent_bin)@FLOATSP*/, action: { token: 'number.float' } },
- { regex: /0[xX]@xdigit+@INTSP*/, action: { token: 'number.hex' } },
- { regex: /0[0-7]+(?![0-9])@INTSP*/, action: { token: 'number.octal' } },
- //{regex: /0/, action: { token: 'number' } }, // INTZERO
- // lexing_INT_dec:
- // - testing_deciexp => lexing_FLOAT_deciexp
- // - testing_fexponent => lexing_FLOAT_deciexp
- // - otherwise => intspseq0 ([0-9]*[lLuU]?)
- { regex: /@digit+(@fexponent|@deciexp)@FLOATSP*/, action: { token: 'number.float' } },
- { regex: /@digit@digitseq0@INTSP*/, action: { token: 'number.decimal' } },
- // DIGIT, if followed by digitseq0, is lexing_INT_dec
- { regex: /@digit+@INTSP*/, action: { token: 'number' } },
- ],
- lexing_COMMENT_block_ml: [
- [/[^\(\*]+/, 'comment'],
- [/\(\*/, 'comment', '@push'],
- [/\(\*/, 'comment.invalid'],
- [/\*\)/, 'comment', '@pop'],
- [/\*/, 'comment']
- ],
- lexing_COMMENT_block_c: [
- [/[^\/*]+/, 'comment'],
- // [/\/\*/, 'comment', '@push' ], // nested C-style block comments not allowed
- // [/\/\*/, 'comment.invalid' ], // NOTE: this breaks block comments in the shape of /* //*/
- [/\*\//, 'comment', '@pop'],
- [/[\/*]/, 'comment']
- ],
- lexing_COMMENT_rest: [
- [/$/, 'comment', '@pop'],
- [/.*/, 'comment']
- ],
- // NOTE: added by AS, specifically for highlighting
- lexing_EFFECT_commaseq0: [
- {
- regex: /@IDENTFST@IDENTRST+|@digit+/,
- action: {
- cases: {
- '@keywords_effects': { token: 'type.effect' },
- '@default': { token: 'identifier' }
- }
- }
- },
- { regex: /,/, action: { token: 'punctuation' } },
- { regex: />/, action: { token: '@rematch', next: '@pop' } },
- ],
- lexing_EXTCODE: [
- { regex: /^%}/, action: { token: '@rematch', next: '@pop', nextEmbedded: '@pop' } },
- { regex: /[^%]+/, action: '' },
- ],
- lexing_DQUOTE: [
- { regex: /"/, action: { token: 'string.quote', next: '@pop' } },
- // AS-20160628: additional hi-lighting for variables in staload/dynload strings
- { regex: /(\{\$)(@IDENTFST@IDENTRST*)(\})/, action: [{ token: 'string.escape' }, { token: 'identifier' }, { token: 'string.escape' }] },
- { regex: /\\$/, action: { token: 'string.escape' } },
- { regex: /\\(@ESCHAR|[xX]@xdigit+|@digit+)/, action: { token: 'string.escape' } },
- { regex: /[^\\"]+/, action: { token: 'string' } }
- ],
- },
- };
- });
|