package lexers import ( "regexp" "strings" "unicode/utf8" "github.com/dlclark/regexp2" . "github.com/alecthomas/chroma/v2" // nolint ) // Raku lexer. var Raku Lexer = Register(MustNewLexer( &Config{ Name: "Raku", Aliases: []string{"perl6", "pl6", "raku"}, Filenames: []string{ "*.pl", "*.pm", "*.nqp", "*.p6", "*.6pl", "*.p6l", "*.pl6", "*.6pm", "*.p6m", "*.pm6", "*.t", "*.raku", "*.rakumod", "*.rakutest", "*.rakudoc", }, MimeTypes: []string{ "text/x-perl6", "application/x-perl6", "text/x-raku", "application/x-raku", }, DotAll: true, }, rakuRules, )) func rakuRules() Rules { type RakuToken int const ( rakuQuote RakuToken = iota rakuNameAttribute rakuPod rakuPodFormatter rakuPodDeclaration rakuMultilineComment rakuMatchRegex rakuSubstitutionRegex ) const ( colonPairOpeningBrackets = `(?:<<|<|«|\(|\[|\{)` colonPairClosingBrackets = `(?:>>|>|»|\)|\]|\})` colonPairPattern = `(?:)(?\w[\w'-]*)(?` + colonPairOpeningBrackets + `)` colonPairLookahead = `(?=(:['\w-]+` + colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `)?` namePattern = `(?:(?!` + colonPairPattern + `)(?:::|[\w':-]))+` variablePattern = `[$@%&]+[.^:?=!~]?` + namePattern globalVariablePattern = `[$@%&]+\*` + namePattern ) keywords := []string{ `BEGIN`, `CATCH`, `CHECK`, `CLOSE`, `CONTROL`, `DOC`, `END`, `ENTER`, `FIRST`, `INIT`, `KEEP`, `LAST`, `LEAVE`, `NEXT`, `POST`, `PRE`, `QUIT`, `UNDO`, `anon`, `augment`, `but`, `class`, `constant`, `default`, `does`, `else`, `elsif`, `enum`, `for`, `gather`, `given`, `grammar`, `has`, `if`, `import`, `is`, `of`, `let`, `loop`, `made`, `make`, `method`, `module`, `multi`, `my`, `need`, `orwith`, `our`, `proceed`, `proto`, `repeat`, `require`, `where`, `return`, `return-rw`, `returns`, `->`, `-->`, `role`, `state`, `sub`, `no`, `submethod`, `subset`, `succeed`, `supersede`, `try`, `unit`, `unless`, `until`, `use`, `when`, `while`, `with`, `without`, `export`, `native`, `repr`, `required`, `rw`, `symbol`, `default`, `cached`, `DEPRECATED`, `dynamic`, `hidden-from-backtrace`, `nodal`, `pure`, `raw`, `start`, `react`, `supply`, `whenever`, `also`, `rule`, `token`, `regex`, `dynamic-scope`, `built`, `temp`, } keywordsPattern := Words(`(?)`, `(>=)`, `minmax`, `notandthen`, `S`, } wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...) operators := []string{ `++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`, `+<`, `+>`, `~&`, `~<`, `~>`, `?&`, `+|`, `+^`, `~|`, `~^`, `?`, `?|`, `?^`, `&`, `^`, `<=>`, `^…^`, `^…`, `…^`, `…`, `...`, `...^`, `^...`, `^...^`, `..`, `..^`, `^..`, `^..^`, `::=`, `:=`, `!=`, `==`, `<=`, `<`, `>=`, `>`, `~~`, `===`, `&&`, `||`, `|`, `^^`, `//`, `??`, `!!`, `^fff^`, `^ff^`, `<==`, `==>`, `<<==`, `==>>`, `=>`, `=`, `<<`, `«`, `>>`, `»`, `,`, `>>.`, `».`, `.&`, `.=`, `.^`, `.?`, `.+`, `.*`, `.`, `∘`, `∩`, `⊍`, `∪`, `⊎`, `∖`, `⊖`, `≠`, `≤`, `≥`, `=:=`, `=~=`, `≅`, `∈`, `∉`, `≡`, `≢`, `∋`, `∌`, `⊂`, `⊄`, `⊆`, `⊈`, `⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`, } operatorsPattern := Words(``, ``, operators...) builtinTypes := []string{ `False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`, `atomicint`, `Attribute`, `Backtrace`, `Backtrace::Frame`, `Bag`, `Baggy`, `BagHash`, `Blob`, `Block`, `Bool`, `Buf`, `Callable`, `CallFrame`, `Cancellation`, `Capture`, `CArray`, `Channel`, `Code`, `compiler`, `Complex`, `ComplexStr`, `CompUnit`, `CompUnit::PrecompilationRepository`, `CompUnit::Repository`, `Empty`, `CompUnit::Repository::FileSystem`, `CompUnit::Repository::Installation`, `Cool`, `CurrentThreadScheduler`, `CX::Warn`, `CX::Take`, `CX::Succeed`, `CX::Return`, `CX::Redo`, `CX::Proceed`, `CX::Next`, `CX::Last`, `CX::Emit`, `CX::Done`, `Cursor`, `Date`, `Dateish`, `DateTime`, `Distribution`, `Distribution::Hash`, `Distribution::Locally`, `Distribution::Path`, `Distribution::Resource`, `Distro`, `Duration`, `Encoding`, `Encoding::GlobalLexerRegistry`, `Endian`, `Enumeration`, `Exception`, `Failure`, `FatRat`, `Grammar`, `Hash`, `HyperWhatever`, `Instant`, `Int`, `int`, `int16`, `int32`, `int64`, `int8`, `str`, `IntStr`, `IO`, `IO::ArgFiles`, `IO::CatHandle`, `IO::Handle`, `IO::Notification`, `IO::Notification::Change`, `IO::Path`, `IO::Path::Cygwin`, `IO::Path::Parts`, `IO::Path::QNX`, `IO::Path::Unix`, `IO::Path::Win32`, `IO::Pipe`, `IO::Socket`, `IO::Socket::Async`, `IO::Socket::Async::ListenSocket`, `IO::Socket::INET`, `IO::Spec`, `IO::Spec::Cygwin`, `IO::Spec::QNX`, `IO::Spec::Unix`, `IO::Spec::Win32`, `IO::Special`, `Iterable`, `Iterator`, `Junction`, `Kernel`, `Label`, `List`, `Lock`, `Lock::Async`, `Lock::ConditionVariable`, `long`, `longlong`, `Macro`, `Map`, `Match`, `Metamodel::AttributeContainer`, `Metamodel::C3MRO`, `Metamodel::ClassHOW`, `Metamodel::ConcreteRoleHOW`, `Metamodel::CurriedRoleHOW`, `Metamodel::DefiniteHOW`, `Metamodel::Documenting`, `Metamodel::EnumHOW`, `Metamodel::Finalization`, `Metamodel::MethodContainer`, `Metamodel::Mixins`, `Metamodel::MROBasedMethodDispatch`, `Metamodel::MultipleInheritance`, `Metamodel::Naming`, `Metamodel::Primitives`, `Metamodel::PrivateMethodContainer`, `Metamodel::RoleContainer`, `Metamodel::RolePunning`, `Metamodel::Stashing`, `Metamodel::Trusting`, `Metamodel::Versioning`, `Method`, `Mix`, `MixHash`, `Mixy`, `Mu`, `NFC`, `NFD`, `NFKC`, `NFKD`, `Nil`, `Num`, `num32`, `num64`, `Numeric`, `NumStr`, `ObjAt`, `Order`, `Pair`, `Parameter`, `Perl`, `Pod::Block`, `Pod::Block::Code`, `Pod::Block::Comment`, `Pod::Block::Declarator`, `Pod::Block::Named`, `Pod::Block::Para`, `Pod::Block::Table`, `Pod::Heading`, `Pod::Item`, `Pointer`, `Positional`, `PositionalBindFailover`, `Proc`, `Proc::Async`, `Promise`, `Proxy`, `PseudoStash`, `QuantHash`, `RaceSeq`, `Raku`, `Range`, `Rat`, `Rational`, `RatStr`, `Real`, `Regex`, `Routine`, `Routine::WrapHandle`, `Scalar`, `Scheduler`, `Semaphore`, `Seq`, `Sequence`, `Set`, `SetHash`, `Setty`, `Signature`, `size_t`, `Slip`, `Stash`, `Str`, `StrDistance`, `Stringy`, `Sub`, `Submethod`, `Supplier`, `Supplier::Preserving`, `Supply`, `Systemic`, `Tap`, `Telemetry`, `Telemetry::Instrument::Thread`, `Telemetry::Instrument::ThreadPool`, `Telemetry::Instrument::Usage`, `Telemetry::Period`, `Telemetry::Sampler`, `Thread`, `Test`, `ThreadPoolScheduler`, `UInt`, `uint16`, `uint32`, `uint64`, `uint8`, `Uni`, `utf8`, `ValueObjAt`, `Variable`, `Version`, `VM`, `Whatever`, `WhateverCode`, `WrapHandle`, `NativeCall`, // Pragmas `precompilation`, `experimental`, `worries`, `MONKEY-TYPING`, `MONKEY-SEE-NO-EVAL`, `MONKEY-GUTS`, `fatal`, `lib`, `isms`, `newline`, `nqp`, `soft`, `strict`, `trace`, `variables`, } builtinTypesPattern := Words(`(? 0 { if tokenClass == rakuPod { match, err := podRegex.FindRunesMatchStartingAt(text, searchPos+nChars) if err == nil { closingChars = match.Runes() nextClosePos = match.Index } else { nextClosePos = -1 } } else { nextClosePos = indexAt(text, closingChars, searchPos+nChars) } nextOpenPos := indexAt(text, openingChars, searchPos+nChars) switch { case nextClosePos == -1: nextClosePos = len(text) nestingLevel = 0 case nextOpenPos != -1 && nextOpenPos < nextClosePos: nestingLevel++ nChars = len(openingChars) searchPos = nextOpenPos default: // next_close_pos < next_open_pos nestingLevel-- nChars = len(closingChars) searchPos = nextClosePos } } endPos = nextClosePos } if endPos < 0 { // if we didn't find a closer, just highlight the // rest of the text in this class endPos = len(text) } adverbre := regexp.MustCompile(`:to\b|:heredoc\b`) var heredocTerminator []rune var endHeredocPos int if adverbre.MatchString(string(adverbs)) { if endPos != len(text) { heredocTerminator = text[state.Pos:endPos] nChars = len(heredocTerminator) } else { endPos = state.Pos + 1 heredocTerminator = []rune{} nChars = 0 } if nChars > 0 { endHeredocPos = indexAt(text[endPos:], heredocTerminator, 0) if endHeredocPos > -1 { endPos += endHeredocPos } else { endPos = len(text) } } } textBetweenBrackets := string(text[state.Pos:endPos]) switch tokenClass { case rakuPod, rakuPodDeclaration, rakuNameAttribute: state.NamedGroups[`value`] = textBetweenBrackets state.NamedGroups[`closing_delimiters`] = string(closingChars) case rakuQuote: if len(heredocTerminator) > 0 { // Length of heredoc terminator + closing chars + `;` heredocFristPunctuationLen := nChars + len(openingChars) + 1 state.NamedGroups[`opening_delimiters`] = string(openingChars) + string(text[state.Pos:state.Pos+heredocFristPunctuationLen]) state.NamedGroups[`value`] = string(text[state.Pos+heredocFristPunctuationLen : endPos]) if endHeredocPos > -1 { state.NamedGroups[`closing_delimiters`] = string(heredocTerminator) } } else { state.NamedGroups[`value`] = textBetweenBrackets if nChars > 0 { state.NamedGroups[`closing_delimiters`] = string(closingChars) } } default: state.Groups = []string{state.Groups[0] + string(text[state.Pos:endPos+nChars])} } state.Pos = endPos + nChars return nil } } // Raku rules // Empty capture groups are placeholders and will be replaced by mutators // DO NOT REMOVE THEM! return Rules{ "root": { // Placeholder, will be overwritten by mutators, DO NOT REMOVE! {`\A\z`, nil, nil}, Include("common"), {`{`, Punctuation, Push(`root`)}, {`\(`, Punctuation, Push(`root`)}, {`[)}]`, Punctuation, Pop(1)}, {`;`, Punctuation, nil}, {`\[|\]`, Operator, nil}, {`.+?`, Text, nil}, }, "common": { {`^#![^\n]*$`, CommentHashbang, nil}, Include("pod"), // Multi-line, Embedded comment { "#`(?(?" + bracketsPattern + `)\k*)`, CommentMultiline, findBrackets(rakuMultilineComment), }, {`#[^\n]*$`, CommentSingle, nil}, // /regex/ { `(?<=(?:^|\(|=|:|~~|\[|{|,|=>)\s*)(/)(?!\]|\))((?:\\\\|\\/|.)*?)((?>)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, {`(»)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, // Hyperoperator | «*« {`(<<)(\S+?)(<<)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, {`(«)(\S+?)(«)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, // Hyperoperator | »*» {`(>>)(\S+?)(>>)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, {`(»)(\S+?)(»)`, ByGroups(Operator, UsingSelf("root"), Operator), nil}, // <> {`(?>)[^\n])+?[},;] *\n)(?!(?:(?!>>).)+?>>\S+?>>)`, Punctuation, Push("<<")}, // «quoted words» {`(? operators | something < onething > something { `(?<=[$@%&]?\w[\w':-]* +)(<=?)( *[^ ]+? *)(>=?)(?= *[$@%&]?\w[\w':-]*)`, ByGroups(Operator, UsingSelf("root"), Operator), nil, }, // { `(?])+?)(>)(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?\w[\w':-]*[^(]|\s+\[))`, ByGroups(Punctuation, String, Punctuation), nil, }, {`C?X::['\w:-]+`, NameException, nil}, Include("metaoperator"), // Pair | key => value { `(\w[\w'-]*)(\s*)(=>)`, ByGroups(String, Text, Operator), nil, }, Include("colon-pair"), // Token { `(?<=(?:^|\s)(?:regex|token|rule)(\s+))` + namePattern + colonPairLookahead + `\s*[({])`, NameFunction, Push("token", "name-adverb"), }, // Substitution {`(?<=^|\b|\s)(?(?:qq|q|Q))(?(?::?(?:heredoc|to|qq|ww|q|w|s|a|h|f|c|b|to|v|x))*)(?\s*)(?(?[^0-9a-zA-Z:\s])\k*)`, EmitterFunc(quote), findBrackets(rakuQuote), }, // Function { `\b` + namePattern + colonPairLookahead + `\()`, NameFunction, Push("name-adverb"), }, // Method { `(?(?[^\w:\s])\k*)`, ByGroupNames( map[string]Emitter{ `opening_delimiters`: Punctuation, `delimiter`: nil, }, ), findBrackets(rakuMatchRegex), }, }, "substitution": { Include("colon-pair-attribute"), // Substitution | s{regex} = value { `(?(?` + bracketsPattern + `)\k*)`, ByGroupNames(map[string]Emitter{ `opening_delimiters`: Punctuation, `delimiter`: nil, }), findBrackets(rakuMatchRegex), }, // Substitution | s/regex/string/ { `(?[^\w:\s])`, Punctuation, findBrackets(rakuSubstitutionRegex), }, }, "number": { {`0_?[0-7]+(_[0-7]+)*`, LiteralNumberOct, nil}, {`0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*`, LiteralNumberHex, nil}, {`0b[01]+(_[01]+)*`, LiteralNumberBin, nil}, { `(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?`, LiteralNumberFloat, nil, }, {`(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*`, LiteralNumberFloat, nil}, {`(?<=\d+)i`, NameConstant, nil}, {`\d+(_\d+)*`, LiteralNumberInteger, nil}, }, "name-adverb": { Include("colon-pair-attribute-keyvalue"), Default(Pop(1)), }, "colon-pair": { // :key(value) {colonPairPattern, colonPair(String), findBrackets(rakuNameAttribute)}, // :123abc { `(:)(\d+)(\w[\w'-]*)`, ByGroups(Punctuation, UsingSelf("number"), String), nil, }, // :key {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, String), nil}, {`\s+`, Text, nil}, }, "colon-pair-attribute": { // :key(value) {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)}, // :123abc { `(:)(\d+)(\w[\w'-]*)`, ByGroups(Punctuation, UsingSelf("number"), NameAttribute), nil, }, // :key {`(:)(!?)(\w[\w'-]*)`, ByGroups(Punctuation, Operator, NameAttribute), nil}, {`\s+`, Text, nil}, }, "colon-pair-attribute-keyvalue": { // :key(value) {colonPairPattern, colonPair(NameAttribute), findBrackets(rakuNameAttribute)}, }, "escape-qq": { { `(? { `(?`), tokenType: Punctuation, stateName: `root`, pushState: true, }), }, // {code} Include(`closure`), // Properties {`(:)(\w+)`, ByGroups(Punctuation, NameAttribute), nil}, // Operator {`\|\||\||&&|&|\.\.|\*\*|%%|%|:|!|<<|«|>>|»|\+|\*\*|\*|\?|=|~|<~~>`, Operator, nil}, // Anchors {`\^\^|\^|\$\$|\$`, NameEntity, nil}, {`\.`, NameEntity, nil}, {`#[^\n]*\n`, CommentSingle, nil}, // Lookaround { `(?`), tokenType: Punctuation, stateName: `regex`, pushState: true, }), }, { `(?)`, ByGroups(Punctuation, Operator, OperatorWord, Punctuation), nil, }, // <$variable> { `(?)`, ByGroups(Punctuation, Operator, NameVariable, Punctuation), nil, }, // Capture markers {`(?`, Operator, nil}, { `(? {`(?`, Punctuation, Pop(1)}, // { `\(`, Punctuation, replaceRule(ruleReplacingConfig{ delimiter: []rune(`)>`), tokenType: Punctuation, stateName: `root`, popState: true, pushState: true, }), }, // { `\s+`, StringRegex, replaceRule(ruleReplacingConfig{ delimiter: []rune(`>`), tokenType: Punctuation, stateName: `regex`, popState: true, pushState: true, }), }, // { `:`, Punctuation, replaceRule(ruleReplacingConfig{ delimiter: []rune(`>`), tokenType: Punctuation, stateName: `root`, popState: true, pushState: true, }), }, }, "regex-variable": { Include(`regex-starting-operators`), // {`(&)?(\w[\w':-]*)(>)`, ByGroups(Operator, NameFunction, Punctuation), Pop(1)}, // `, Punctuation, Pop(1)}, Include("regex-class-builtin"), Include("variable"), Include(`regex-starting-operators`), Include("colon-pair-attribute"), {`(?] { `\b([RZX]+)\b(\[)([^\s\]]+?)(\])`, ByGroups(OperatorWord, Punctuation, UsingSelf("root"), Punctuation), nil, }, // Z=> {`\b([RZX]+)\b([^\s\]]+)`, ByGroups(OperatorWord, UsingSelf("operator")), nil}, }, "operator": { // Word Operator {wordOperatorsPattern, OperatorWord, nil}, // Operator {operatorsPattern, Operator, nil}, }, "pod": { // Single-line pod declaration {`(#[|=])\s`, Keyword, Push("pod-single")}, // Multi-line pod declaration { "(?#[|=])(?(?" + bracketsPattern + `)\k*)(?)(?)`, ByGroupNames( map[string]Emitter{ `keyword`: Keyword, `opening_delimiters`: Punctuation, `delimiter`: nil, `value`: UsingSelf("pod-declaration"), `closing_delimiters`: Punctuation, }), findBrackets(rakuPodDeclaration), }, Include("pod-blocks"), }, "pod-blocks": { // =begin code { `(?<=^ *)(? *)(?=begin)(? +)(?code)(?[^\n]*)(?.*?)(?^\k)(?=end)(? +)\k`, EmitterFunc(podCode), nil, }, // =begin { `(?<=^ *)(? *)(?=begin)(? +)(?!code)(?\w[\w'-]*)(?[^\n]*)(?)(?)`, ByGroupNames( map[string]Emitter{ `ws`: Comment, `keyword`: Keyword, `ws2`: StringDoc, `name`: Keyword, `config`: EmitterFunc(podConfig), `value`: UsingSelf("pod-begin"), `closing_delimiters`: Keyword, }), findBrackets(rakuPod), }, // =for ... { `(?<=^ *)(? *)(?=(?:for|defn))(? +)(?\w[\w'-]*)(?[^\n]*\n)`, ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), Push("pod-paragraph"), }, // =config { `(?<=^ *)(? *)(?=config)(? +)(?\w[\w'-]*)(?[^\n]*\n)`, ByGroups(Comment, Keyword, StringDoc, Keyword, EmitterFunc(podConfig)), nil, }, // =alias { `(?<=^ *)(? *)(?=alias)(? +)(?\w[\w'-]*)(?[^\n]*\n)`, ByGroups(Comment, Keyword, StringDoc, Keyword, StringDoc), nil, }, // =encoding { `(?<=^ *)(? *)(?=encoding)(? +)(?[^\n]+)`, ByGroups(Comment, Keyword, StringDoc, Name), nil, }, // =para ... { `(?<=^ *)(? *)(?=(?:para|table|pod))(?(? *)(?=head\d+)(? *)(?#?)`, ByGroups(Comment, Keyword, GenericHeading, Keyword), Push("pod-heading"), }, // =item ... { `(?<=^ *)(? *)(?=(?:item\d*|comment|data|[A-Z]+))(? *)(?#?)`, ByGroups(Comment, Keyword, StringDoc, Keyword), Push("pod-paragraph"), }, { `(?<=^ *)(? *)(?=finish)(?[^\n]*)`, ByGroups(Comment, Keyword, EmitterFunc(podConfig)), Push("pod-finish"), }, // ={custom} ... { `(?<=^ *)(? *)(?=\w[\w'-]*)(? *)(?#?)`, ByGroups(Comment, Name, StringDoc, Keyword), Push("pod-paragraph"), }, // = podconfig { `(?<=^ *)(? *=)(? *)(?(?::\w[\w'-]*(?:` + colonPairOpeningBrackets + `.+?` + colonPairClosingBrackets + `) *)*\n)`, ByGroups(Keyword, StringDoc, EmitterFunc(podConfig)), nil, }, }, "pod-begin": { Include("pod-blocks"), Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pod-declaration": { Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pod-paragraph": { {`\n *\n|\n(?=^ *=)`, StringDoc, Pop(1)}, Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pod-single": { {`\n`, StringDoc, Pop(1)}, Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pod-heading": { {`\n *\n|\n(?=^ *=)`, GenericHeading, Pop(1)}, Include("pre-pod-formatter"), {`.+?`, GenericHeading, nil}, }, "pod-finish": { {`\z`, nil, Pop(1)}, Include("pre-pod-formatter"), {`.+?`, StringDoc, nil}, }, "pre-pod-formatter": { // C, B, ... { `(?[CBIUDTKRPAELZVMSXN])(?<+|«)`, ByGroups(Keyword, Punctuation), findBrackets(rakuPodFormatter), }, }, "pod-formatter": { // Placeholder rule, will be replaced by mutators. DO NOT REMOVE! {`>`, Punctuation, Pop(1)}, Include("pre-pod-formatter"), // Placeholder rule, will be replaced by mutators. DO NOT REMOVE! {`.+?`, StringOther, nil}, }, "variable": { {variablePattern, NameVariable, Push("name-adverb")}, {globalVariablePattern, NameVariableGlobal, Push("name-adverb")}, {`[$@]<[^>]+>`, NameVariable, nil}, {`\$[/!¢]`, NameVariable, nil}, {`[$@%]`, NameVariable, nil}, }, "single-quote": { {`(?>(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)}, Include("ww"), }, "«": { {`»(?!\s*(?:\d+|\.(?:Int|Numeric)|[$@%]\*?[\w':-]+|\s+\[))`, Punctuation, Pop(1)}, Include("ww"), }, "ww": { Include("single-quote"), Include("qq"), }, "qq": { Include("qq-variable"), Include("closure"), Include(`escape-char`), Include("escape-hexadecimal"), Include("escape-c-name"), Include("escape-qq"), {`.+?`, StringDouble, nil}, }, "qq-variable": { { `(?\.)(?` + namePattern + `)` + colonPairLookahead + `\()`, ByGroupNames(map[string]Emitter{ `operator`: Operator, `method_name`: NameFunction, }), Push(`name-adverb`), }, // Function/Signature { `\(`, Punctuation, replaceRule( ruleReplacingConfig{ delimiter: []rune(`)`), tokenType: Punctuation, stateName: `root`, pushState: true, }), }, Default(Pop(1)), }, "Q": { Include("escape-qq"), {`.+?`, String, nil}, }, "Q-closure": { Include("escape-qq"), Include("closure"), {`.+?`, String, nil}, }, "Q-variable": { Include("escape-qq"), Include("qq-variable"), {`.+?`, String, nil}, }, "closure": { {`(? -1 { idx = utf8.RuneCountInString(text[:idx]) // Search again if the substr is escaped with backslash if (idx > 1 && strFromPos[idx-1] == '\\' && strFromPos[idx-2] != '\\') || (idx == 1 && strFromPos[idx-1] == '\\') { idx = indexAt(str[pos:], substr, idx+1) idx = utf8.RuneCountInString(text[:idx]) if idx < 0 { return idx } } idx += pos } return idx } // Tells if an array of string contains a string func contains(s []string, e string) bool { for _, value := range s { if value == e { return true } } return false } type rulePosition int const ( topRule rulePosition = 0 bottomRule = -1 ) type ruleMakingConfig struct { delimiter []rune pattern string tokenType Emitter mutator Mutator numberOfDelimiterChars int } type ruleReplacingConfig struct { delimiter []rune pattern string tokenType Emitter numberOfDelimiterChars int mutator Mutator appendMutator Mutator rulePosition rulePosition stateName string pop bool popState bool pushState bool } // Pops rule from state-stack and replaces the rule with the previous rule func popRule(rule ruleReplacingConfig) MutatorFunc { return func(state *LexerState) error { stackName := genStackName(rule.stateName, rule.rulePosition) stack, ok := state.Get(stackName).([]ruleReplacingConfig) if ok && len(stack) > 0 { // Pop from stack stack = stack[:len(stack)-1] lastRule := stack[len(stack)-1] lastRule.pushState = false lastRule.popState = false lastRule.pop = true state.Set(stackName, stack) // Call replaceRule to use the last rule err := replaceRule(lastRule)(state) if err != nil { panic(err) } } return nil } } // Replaces a state's rule based on the rule config and position func replaceRule(rule ruleReplacingConfig) MutatorFunc { return func(state *LexerState) error { stateName := rule.stateName stackName := genStackName(rule.stateName, rule.rulePosition) stack, ok := state.Get(stackName).([]ruleReplacingConfig) if !ok { stack = []ruleReplacingConfig{} } // If state-stack is empty fill it with the placeholder rule if len(stack) == 0 { stack = []ruleReplacingConfig{ { // Placeholder, will be overwritten by mutators, DO NOT REMOVE! pattern: `\A\z`, tokenType: nil, mutator: nil, stateName: stateName, rulePosition: rule.rulePosition, }, } state.Set(stackName, stack) } var mutator Mutator mutators := []Mutator{} switch { case rule.rulePosition == topRule && rule.mutator == nil: // Default mutator for top rule mutators = []Mutator{Pop(1), popRule(rule)} case rule.rulePosition == topRule && rule.mutator != nil: // Default mutator for top rule, when rule.mutator is set mutators = []Mutator{rule.mutator, popRule(rule)} case rule.mutator != nil: mutators = []Mutator{rule.mutator} } if rule.appendMutator != nil { mutators = append(mutators, rule.appendMutator) } if len(mutators) > 0 { mutator = Mutators(mutators...) } else { mutator = nil } ruleConfig := ruleMakingConfig{ pattern: rule.pattern, delimiter: rule.delimiter, numberOfDelimiterChars: rule.numberOfDelimiterChars, tokenType: rule.tokenType, mutator: mutator, } cRule := makeRule(ruleConfig) switch rule.rulePosition { case topRule: state.Rules[stateName][0] = cRule case bottomRule: state.Rules[stateName][len(state.Rules[stateName])-1] = cRule } // Pop state name from stack if asked. State should be popped first before Pushing if rule.popState { err := Pop(1).Mutate(state) if err != nil { panic(err) } } // Push state name to stack if asked if rule.pushState { err := Push(stateName).Mutate(state) if err != nil { panic(err) } } if !rule.pop { state.Set(stackName, append(stack, rule)) } return nil } } // Generates rule replacing stack using state name and rule position func genStackName(stateName string, rulePosition rulePosition) (stackName string) { switch rulePosition { case topRule: stackName = stateName + `-top-stack` case bottomRule: stackName = stateName + `-bottom-stack` } return } // Makes a compiled rule and returns it func makeRule(config ruleMakingConfig) *CompiledRule { var rePattern string if len(config.delimiter) > 0 { delimiter := string(config.delimiter) if config.numberOfDelimiterChars > 1 { delimiter = strings.Repeat(delimiter, config.numberOfDelimiterChars) } rePattern = `(? 1 { lang = langMatch[1] } // Tokenise code based on lang property sublexer := Get(lang) if sublexer != nil { iterator, err := sublexer.Tokenise(nil, state.NamedGroups[`value`]) if err != nil { panic(err) } else { iterators = append(iterators, iterator) } } else { iterators = append(iterators, Literator(tokens[4])) } // Append the rest of the tokens iterators = append(iterators, Literator(tokens[5:]...)) return Concaterator(iterators...) }