Thursday, July 23, 2009

Text index to Html Index conversion

This week I was working on an enhancement to add highlighting, patterned underline, and strikethrough functionality to the textArea control of flex. I ran into a problem keeping track of the position of the text highlighted when the user edits the content. So I decided to wrap the highlighted text in html tags such as . My problem was that I needed an algorithm to conver text to html index.

After rigourous searching I found Andrei Ionescu post on this. I liked his method but reading through the comments I saw someone saying something about regular expressions and I thought to myself why not use regular expressions to help preprocess the html instead of character parsing.

Eventually I came up with this method and here I post it for anyone interested, feel free to use it.


/**Returns the html index of the text index specified*/

private function calculateHtmlIndex(htmlStr:String, textIndex:int):int

{

var htmlCounter:int = 0;

var textCounter:int = 0;

//the following entities have size zero, the rest have size one.

var sizeOneEntities:String = "|&lt;|&gt;|&amp;|&quot;|&apos;|<BR>|</P>|";

// characters that appears when a tag starts

var openTags:String = "<&";

// characters that appears when a tag ends

// var closeTags:String = ">;";

/**generates an array of tokens which are tags, entities and text. */

var tagPattern:RegExp = /<[^<>\s\/]*\s*[^<>]*[^P]?>|&(?:lt|gt|amp|quot|apos);|[^<>&]*/g;

var tokensArray:Array = htmlStr.match(tagPattern);

var i:int = 0;

var currentToken:String;

var isHtmlToken:Boolean ;

while (textCounter <= textIndex)

{

currentToken = tokensArray[i++];

isHtmlToken = (openTags.indexOf(currentToken.charAt(0)) != -1) ;

if (isHtmlToken)

{ //this token is HMTL or its a string begining with & and ending with ; and is shorter than 4 chars

htmlCounter += currentToken.length;

if (sizeOneEntities.indexOf("|" + currentToken + "|") != -1)

textCounter++; //this html corresponds to 1 textual character

}

else if ((textCounter + currentToken.length) >= textIndex)

{ //the text index is inside the current token

htmlCounter += textIndex - textCounter;

textCounter += textIndex - textCounter;//not needed

break;

}

else

{ //add the text size to html and text counters and move on

htmlCounter += currentToken.length;

textCounter += currentToken.length;

}

}

return htmlCounter;

}

No comments:

Post a Comment