site-tpe/EnlighterJS/Source/Tokenizer/Standard.js
2025-05-16 18:49:08 +02:00

114 lines
3.3 KiB
JavaScript

/*
---
description: Enlighter`s Standard Tokenizer Engine
license: MIT-style
authors:
- Andi Dittrich
requires:
- Core/1.4.5
provides: [Tokenizer.Standard]
...
*/
EJS.Tokenizer.Standard = new Class({
initialize : function(){
},
getTokens : function(language, code){
// create token object
var token = (function(text, alias, index){
return {
text: text,
alias: alias,
index: index,
length: text.length,
end: text.length + index
}
});
// token list
var rawTokens = this.getPreprocessedTokens(token);
// apply each rule to given sourcecode string
Array.each(language.getRules(), function(rule){
var match;
// find ALL possible matches (also overlapping ones!)
while (match = rule.pattern.exec(code)){
// overrides the usual regex behaviour of not matching results that overlap
rule.pattern.lastIndex = match.index+1;
// matching groups used ?
if (match.length == 1) {
rawTokens.push(token(match[0], rule.alias, match.index));
// use full pattern
}else{
// get first matched group
for (var i = 1; i < match.length; i++) {
if (match[i] && match[i].length > 0){
rawTokens.push(token(match[i], rule.alias, match.index + match[0].indexOf(match[i])));
}
}
}
}
});
// sort tokens by index (first occurrence)
rawTokens = rawTokens.sort(function(token1, token2){
return token1.index - token2.index;
});
// cleaned token list to render
var tokens = [];
// last token position
var lastTokenEnd = 0;
// iterate over raw token list and retain the first match - drop overlaps
for (var i=0; i<rawTokens.length; i++){
// unmatched text between tokens ?
if (lastTokenEnd < rawTokens[i].index ){
// create new start text token
tokens.push(token(code.substring(lastTokenEnd, rawTokens[i].index), '', lastTokenEnd));
}
// push current token to list
tokens.push(rawTokens[i]);
// store last token position
lastTokenEnd = rawTokens[i].end;
// find next, non overlapping token
var nextTokenFound = false;
for (var j = i + 1; j < rawTokens.length; j++){
if (rawTokens[j].index >= lastTokenEnd){
// the "current" token -> i will be incremented in the next loop => j-1
i = j-1;
nextTokenFound = true;
break;
}
}
// final position reached ?
if (nextTokenFound===false){
break;
}
}
// text fragments complete ? or is the final one missing ?
if (lastTokenEnd < code.length){
tokens.push(token(code.substring(lastTokenEnd), '', lastTokenEnd));
}
return tokens;
},
// token pre-processing; can be overloaded by extending class
getPreprocessedTokens: function(token){
return [];
}
});