ruby.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. define(["require", "exports"], function (require, exports) {
  6. 'use strict';
  7. Object.defineProperty(exports, "__esModule", { value: true });
  8. exports.conf = {
  9. comments: {
  10. lineComment: '#',
  11. blockComment: ['=begin', '=end'],
  12. },
  13. brackets: [
  14. ['(', ')'],
  15. ['{', '}'],
  16. ['[', ']']
  17. ],
  18. autoClosingPairs: [
  19. { open: '{', close: '}' },
  20. { open: '[', close: ']' },
  21. { open: '(', close: ')' },
  22. { open: '"', close: '"' },
  23. { open: '\'', close: '\'' },
  24. ],
  25. surroundingPairs: [
  26. { open: '{', close: '}' },
  27. { open: '[', close: ']' },
  28. { open: '(', close: ')' },
  29. { open: '"', close: '"' },
  30. { open: '\'', close: '\'' },
  31. ],
  32. indentationRules: {
  33. increaseIndentPattern: new RegExp('^\\s*((begin|class|(private|protected)\\s+def|def|else|elsif|ensure|for|if|module|rescue|unless|until|when|while|case)|([^#]*\\sdo\\b)|([^#]*=\\s*(case|if|unless)))\\b([^#\\{;]|("|\'|\/).*\\4)*(#.*)?$'),
  34. decreaseIndentPattern: new RegExp('^\\s*([}\\]]([,)]?\\s*(#|$)|\\.[a-zA-Z_]\\w*\\b)|(end|rescue|ensure|else|elsif|when)\\b)'),
  35. }
  36. };
  37. /*
  38. * Ruby language definition
  39. *
  40. * Quite a complex language due to elaborate escape sequences
  41. * and quoting of literate strings/regular expressions, and
  42. * an 'end' keyword that does not always apply to modifiers like until and while,
  43. * and a 'do' keyword that sometimes starts a block, but sometimes is part of
  44. * another statement (like 'while').
  45. *
  46. * (1) end blocks:
  47. * 'end' may end declarations like if or until, but sometimes 'if' or 'until'
  48. * are modifiers where there is no 'end'. Also, 'do' sometimes starts a block
  49. * that is ended by 'end', but sometimes it is part of a 'while', 'for', or 'until'
  50. * To do proper brace matching we do some elaborate state manipulation.
  51. * some examples:
  52. *
  53. * until bla do
  54. * work until tired
  55. * list.each do
  56. * something if test
  57. * end
  58. * end
  59. *
  60. * or
  61. *
  62. * if test
  63. * something (if test then x end)
  64. * bar if bla
  65. * end
  66. *
  67. * or, how about using class as a property..
  68. *
  69. * class Test
  70. * def endpoint
  71. * self.class.endpoint || routes
  72. * end
  73. * end
  74. *
  75. * (2) quoting:
  76. * there are many kinds of strings and escape sequences. But also, one can
  77. * start many string-like things as '%qx' where q specifies the kind of string
  78. * (like a command, escape expanded, regular expression, symbol etc.), and x is
  79. * some character and only another 'x' ends the sequence. Except for brackets
  80. * where the closing bracket ends the sequence.. and except for a nested bracket
  81. * inside the string like entity. Also, such strings can contain interpolated
  82. * ruby expressions again (and span multiple lines). Moreover, expanded
  83. * regular expression can also contain comments.
  84. */
  85. exports.language = {
  86. tokenPostfix: '.ruby',
  87. keywords: [
  88. '__LINE__', '__ENCODING__', '__FILE__', 'BEGIN', 'END', 'alias', 'and', 'begin',
  89. 'break', 'case', 'class', 'def', 'defined?', 'do', 'else', 'elsif', 'end',
  90. 'ensure', 'for', 'false', 'if', 'in', 'module', 'next', 'nil', 'not', 'or', 'redo',
  91. 'rescue', 'retry', 'return', 'self', 'super', 'then', 'true', 'undef', 'unless',
  92. 'until', 'when', 'while', 'yield',
  93. ],
  94. keywordops: [
  95. '::', '..', '...', '?', ':', '=>'
  96. ],
  97. builtins: [
  98. 'require', 'public', 'private', 'include', 'extend', 'attr_reader',
  99. 'protected', 'private_class_method', 'protected_class_method', 'new'
  100. ],
  101. // these are closed by 'end' (if, while and until are handled separately)
  102. declarations: [
  103. 'module', 'class', 'def', 'case', 'do', 'begin', 'for', 'if', 'while', 'until', 'unless'
  104. ],
  105. linedecls: [
  106. 'def', 'case', 'do', 'begin', 'for', 'if', 'while', 'until', 'unless'
  107. ],
  108. operators: [
  109. '^', '&', '|', '<=>', '==', '===', '!~', '=~', '>', '>=', '<', '<=', '<<', '>>', '+',
  110. '-', '*', '/', '%', '**', '~', '+@', '-@', '[]', '[]=', '`',
  111. '+=', '-=', '*=', '**=', '/=', '^=', '%=', '<<=', '>>=', '&=', '&&=', '||=', '|='
  112. ],
  113. brackets: [
  114. { open: '(', close: ')', token: 'delimiter.parenthesis' },
  115. { open: '{', close: '}', token: 'delimiter.curly' },
  116. { open: '[', close: ']', token: 'delimiter.square' }
  117. ],
  118. // we include these common regular expressions
  119. symbols: /[=><!~?:&|+\-*\/\^%\.]+/,
  120. // escape sequences
  121. escape: /(?:[abefnrstv\\"'\n\r]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2}|u[0-9A-Fa-f]{4})/,
  122. escapes: /\\(?:C\-(@escape|.)|c(@escape|.)|@escape)/,
  123. decpart: /\d(_?\d)*/,
  124. decimal: /0|@decpart/,
  125. delim: /[^a-zA-Z0-9\s\n\r]/,
  126. heredelim: /(?:\w+|'[^']*'|"[^"]*"|`[^`]*`)/,
  127. regexpctl: /[(){}\[\]\$\^|\-*+?\.]/,
  128. regexpesc: /\\(?:[AzZbBdDfnrstvwWn0\\\/]|@regexpctl|c[A-Z]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4})?/,
  129. // The main tokenizer for our languages
  130. tokenizer: {
  131. // Main entry.
  132. // root.<decl> where decl is the current opening declaration (like 'class')
  133. root: [
  134. // identifiers and keywords
  135. // most complexity here is due to matching 'end' correctly with declarations.
  136. // We distinguish a declaration that comes first on a line, versus declarations further on a line (which are most likey modifiers)
  137. [/^(\s*)([a-z_]\w*[!?=]?)/, ['white',
  138. {
  139. cases: {
  140. 'for|until|while': { token: 'keyword.$2', next: '@dodecl.$2' },
  141. '@declarations': { token: 'keyword.$2', next: '@root.$2' },
  142. 'end': { token: 'keyword.$S2', next: '@pop' },
  143. '@keywords': 'keyword',
  144. '@builtins': 'predefined',
  145. '@default': 'identifier'
  146. }
  147. }]],
  148. [/[a-z_]\w*[!?=]?/,
  149. {
  150. cases: {
  151. 'if|unless|while|until': { token: 'keyword.$0x', next: '@modifier.$0x' },
  152. 'for': { token: 'keyword.$2', next: '@dodecl.$2' },
  153. '@linedecls': { token: 'keyword.$0', next: '@root.$0' },
  154. 'end': { token: 'keyword.$S2', next: '@pop' },
  155. '@keywords': 'keyword',
  156. '@builtins': 'predefined',
  157. '@default': 'identifier'
  158. }
  159. }],
  160. [/[A-Z][\w]*[!?=]?/, 'constructor.identifier'],
  161. [/\$[\w]*/, 'global.constant'],
  162. [/@[\w]*/, 'namespace.instance.identifier'],
  163. [/@@[\w]*/, 'namespace.class.identifier'],
  164. // here document
  165. [/<<[-~](@heredelim).*/, { token: 'string.heredoc.delimiter', next: '@heredoc.$1' }],
  166. [/[ \t\r\n]+<<(@heredelim).*/, { token: 'string.heredoc.delimiter', next: '@heredoc.$1' }],
  167. [/^<<(@heredelim).*/, { token: 'string.heredoc.delimiter', next: '@heredoc.$1' }],
  168. // whitespace
  169. { include: '@whitespace' },
  170. // strings
  171. [/"/, { token: 'string.d.delim', next: '@dstring.d."' }],
  172. [/'/, { token: 'string.sq.delim', next: '@sstring.sq' }],
  173. // % literals. For efficiency, rematch in the 'pstring' state
  174. [/%([rsqxwW]|Q?)/, { token: '@rematch', next: 'pstring' }],
  175. // commands and symbols
  176. [/`/, { token: 'string.x.delim', next: '@dstring.x.`' }],
  177. [/:(\w|[$@])\w*[!?=]?/, 'string.s'],
  178. [/:"/, { token: 'string.s.delim', next: '@dstring.s."' }],
  179. [/:'/, { token: 'string.s.delim', next: '@sstring.s' }],
  180. // regular expressions. Lookahead for a (not escaped) closing forwardslash on the same line
  181. [/\/(?=(\\\/|[^\/\n])+\/)/, { token: 'regexp.delim', next: '@regexp' }],
  182. // delimiters and operators
  183. [/[{}()\[\]]/, '@brackets'],
  184. [/@symbols/, {
  185. cases: {
  186. '@keywordops': 'keyword',
  187. '@operators': 'operator',
  188. '@default': ''
  189. }
  190. }],
  191. [/[;,]/, 'delimiter'],
  192. // numbers
  193. [/0[xX][0-9a-fA-F](_?[0-9a-fA-F])*/, 'number.hex'],
  194. [/0[_oO][0-7](_?[0-7])*/, 'number.octal'],
  195. [/0[bB][01](_?[01])*/, 'number.binary'],
  196. [/0[dD]@decpart/, 'number'],
  197. [/@decimal((\.@decpart)?([eE][\-+]?@decpart)?)/, {
  198. cases: {
  199. '$1': 'number.float',
  200. '@default': 'number'
  201. }
  202. }],
  203. ],
  204. // used to not treat a 'do' as a block opener if it occurs on the same
  205. // line as a 'do' statement: 'while|until|for'
  206. // dodecl.<decl> where decl is the declarations started, like 'while'
  207. dodecl: [
  208. [/^/, { token: '', switchTo: '@root.$S2' }],
  209. [/[a-z_]\w*[!?=]?/, {
  210. cases: {
  211. 'end': { token: 'keyword.$S2', next: '@pop' },
  212. 'do': { token: 'keyword', switchTo: '@root.$S2' },
  213. '@linedecls': { token: '@rematch', switchTo: '@root.$S2' },
  214. '@keywords': 'keyword',
  215. '@builtins': 'predefined',
  216. '@default': 'identifier'
  217. }
  218. }],
  219. { include: '@root' }
  220. ],
  221. // used to prevent potential modifiers ('if|until|while|unless') to match
  222. // with 'end' keywords.
  223. // modifier.<decl>x where decl is the declaration starter, like 'if'
  224. modifier: [
  225. [/^/, '', '@pop'],
  226. [/[a-z_]\w*[!?=]?/, {
  227. cases: {
  228. 'end': { token: 'keyword.$S2', next: '@pop' },
  229. 'then|else|elsif|do': { token: 'keyword', switchTo: '@root.$S2' },
  230. '@linedecls': { token: '@rematch', switchTo: '@root.$S2' },
  231. '@keywords': 'keyword',
  232. '@builtins': 'predefined',
  233. '@default': 'identifier'
  234. }
  235. }],
  236. { include: '@root' }
  237. ],
  238. // single quote strings (also used for symbols)
  239. // sstring.<kind> where kind is 'sq' (single quote) or 's' (symbol)
  240. sstring: [
  241. [/[^\\']+/, 'string.$S2'],
  242. [/\\\\|\\'|\\$/, 'string.$S2.escape'],
  243. [/\\./, 'string.$S2.invalid'],
  244. [/'/, { token: 'string.$S2.delim', next: '@pop' }]
  245. ],
  246. // double quoted "string".
  247. // dstring.<kind>.<delim> where kind is 'd' (double quoted), 'x' (command), or 's' (symbol)
  248. // and delim is the ending delimiter (" or `)
  249. dstring: [
  250. [/[^\\`"#]+/, 'string.$S2'],
  251. [/#/, 'string.$S2.escape', '@interpolated'],
  252. [/\\$/, 'string.$S2.escape'],
  253. [/@escapes/, 'string.$S2.escape'],
  254. [/\\./, 'string.$S2.escape.invalid'],
  255. [/[`"]/, {
  256. cases: {
  257. '$#==$S3': { token: 'string.$S2.delim', next: '@pop' },
  258. '@default': 'string.$S2'
  259. }
  260. }]
  261. ],
  262. // literal documents
  263. // heredoc.<close> where close is the closing delimiter
  264. heredoc: [
  265. [/^(\s*)(@heredelim)$/, {
  266. cases: {
  267. '$2==$S2': ['string.heredoc', { token: 'string.heredoc.delimiter', next: '@pop' }],
  268. '@default': ['string.heredoc', 'string.heredoc']
  269. }
  270. }],
  271. [/.*/, 'string.heredoc'],
  272. ],
  273. // interpolated sequence
  274. interpolated: [
  275. [/\$\w*/, 'global.constant', '@pop'],
  276. [/@\w*/, 'namespace.class.identifier', '@pop'],
  277. [/@@\w*/, 'namespace.instance.identifier', '@pop'],
  278. [/[{]/, { token: 'string.escape.curly', switchTo: '@interpolated_compound' }],
  279. ['', '', '@pop'],
  280. ],
  281. // any code
  282. interpolated_compound: [
  283. [/[}]/, { token: 'string.escape.curly', next: '@pop' }],
  284. { include: '@root' },
  285. ],
  286. // %r quoted regexp
  287. // pregexp.<open>.<close> where open/close are the open/close delimiter
  288. pregexp: [
  289. { include: '@whitespace' },
  290. // turns out that you can quote using regex control characters, aargh!
  291. // for example; %r|kgjgaj| is ok (even though | is used for alternation)
  292. // so, we need to match those first
  293. [/[^\(\{\[\\]/, {
  294. cases: {
  295. '$#==$S3': { token: 'regexp.delim', next: '@pop' },
  296. '$#==$S2': { token: 'regexp.delim', next: '@push' },
  297. '~[)}\\]]': '@brackets.regexp.escape.control',
  298. '~@regexpctl': 'regexp.escape.control',
  299. '@default': 'regexp'
  300. }
  301. }],
  302. { include: '@regexcontrol' },
  303. ],
  304. // We match regular expression quite precisely
  305. regexp: [
  306. { include: '@regexcontrol' },
  307. [/[^\\\/]/, 'regexp'],
  308. ['/[ixmp]*', { token: 'regexp.delim' }, '@pop'],
  309. ],
  310. regexcontrol: [
  311. [/(\{)(\d+(?:,\d*)?)(\})/, ['@brackets.regexp.escape.control', 'regexp.escape.control', '@brackets.regexp.escape.control']],
  312. [/(\[)(\^?)/, ['@brackets.regexp.escape.control', { token: 'regexp.escape.control', next: '@regexrange' }]],
  313. [/(\()(\?[:=!])/, ['@brackets.regexp.escape.control', 'regexp.escape.control']],
  314. [/\(\?#/, { token: 'regexp.escape.control', next: '@regexpcomment' }],
  315. [/[()]/, '@brackets.regexp.escape.control'],
  316. [/@regexpctl/, 'regexp.escape.control'],
  317. [/\\$/, 'regexp.escape'],
  318. [/@regexpesc/, 'regexp.escape'],
  319. [/\\\./, 'regexp.invalid'],
  320. [/#/, 'regexp.escape', '@interpolated'],
  321. ],
  322. regexrange: [
  323. [/-/, 'regexp.escape.control'],
  324. [/\^/, 'regexp.invalid'],
  325. [/\\$/, 'regexp.escape'],
  326. [/@regexpesc/, 'regexp.escape'],
  327. [/[^\]]/, 'regexp'],
  328. [/\]/, '@brackets.regexp.escape.control', '@pop'],
  329. ],
  330. regexpcomment: [
  331. [/[^)]+/, 'comment'],
  332. [/\)/, { token: 'regexp.escape.control', next: '@pop' }]
  333. ],
  334. // % quoted strings
  335. // A bit repetitive since we need to often special case the kind of ending delimiter
  336. pstring: [
  337. [/%([qws])\(/, { token: 'string.$1.delim', switchTo: '@qstring.$1.(.)' }],
  338. [/%([qws])\[/, { token: 'string.$1.delim', switchTo: '@qstring.$1.[.]' }],
  339. [/%([qws])\{/, { token: 'string.$1.delim', switchTo: '@qstring.$1.{.}' }],
  340. [/%([qws])</, { token: 'string.$1.delim', switchTo: '@qstring.$1.<.>' }],
  341. [/%([qws])(@delim)/, { token: 'string.$1.delim', switchTo: '@qstring.$1.$2.$2' }],
  342. [/%r\(/, { token: 'regexp.delim', switchTo: '@pregexp.(.)' }],
  343. [/%r\[/, { token: 'regexp.delim', switchTo: '@pregexp.[.]' }],
  344. [/%r\{/, { token: 'regexp.delim', switchTo: '@pregexp.{.}' }],
  345. [/%r</, { token: 'regexp.delim', switchTo: '@pregexp.<.>' }],
  346. [/%r(@delim)/, { token: 'regexp.delim', switchTo: '@pregexp.$1.$1' }],
  347. [/%(x|W|Q?)\(/, { token: 'string.$1.delim', switchTo: '@qqstring.$1.(.)' }],
  348. [/%(x|W|Q?)\[/, { token: 'string.$1.delim', switchTo: '@qqstring.$1.[.]' }],
  349. [/%(x|W|Q?)\{/, { token: 'string.$1.delim', switchTo: '@qqstring.$1.{.}' }],
  350. [/%(x|W|Q?)</, { token: 'string.$1.delim', switchTo: '@qqstring.$1.<.>' }],
  351. [/%(x|W|Q?)(@delim)/, { token: 'string.$1.delim', switchTo: '@qqstring.$1.$2.$2' }],
  352. [/%([rqwsxW]|Q?)./, { token: 'invalid', next: '@pop' }],
  353. [/./, { token: 'invalid', next: '@pop' }],
  354. ],
  355. // non-expanded quoted string.
  356. // qstring.<kind>.<open>.<close>
  357. // kind = q|w|s (single quote, array, symbol)
  358. // open = open delimiter
  359. // close = close delimiter
  360. qstring: [
  361. [/\\$/, 'string.$S2.escape'],
  362. [/\\./, 'string.$S2.escape'],
  363. [/./, {
  364. cases: {
  365. '$#==$S4': { token: 'string.$S2.delim', next: '@pop' },
  366. '$#==$S3': { token: 'string.$S2.delim', next: '@push' },
  367. '@default': 'string.$S2'
  368. }
  369. }],
  370. ],
  371. // expanded quoted string.
  372. // qqstring.<kind>.<open>.<close>
  373. // kind = Q|W|x (double quote, array, command)
  374. // open = open delimiter
  375. // close = close delimiter
  376. qqstring: [
  377. [/#/, 'string.$S2.escape', '@interpolated'],
  378. { include: '@qstring' }
  379. ],
  380. // whitespace & comments
  381. whitespace: [
  382. [/[ \t\r\n]+/, ''],
  383. [/^\s*=begin\b/, 'comment', '@comment'],
  384. [/#.*$/, 'comment'],
  385. ],
  386. comment: [
  387. [/[^=]+/, 'comment'],
  388. [/^\s*=begin\b/, 'comment.invalid'],
  389. [/^\s*=end\b.*/, 'comment', '@pop'],
  390. [/[=]/, 'comment']
  391. ],
  392. }
  393. };
  394. });