org.gjt.sp.jedit.TextUtilities Maven / Gradle / Ivy
/*
* TextUtilities.java - Various text functions
* Copyright (C) 1998, 2005 Slava Pestov
* :tabSize=8:indentSize=8:noTabs=false:
* :folding=explicit:collapseFolds=1:
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package org.gjt.sp.jedit;
//{{{ Imports
import java.util.*;
import javax.swing.text.Segment;
import org.gjt.sp.jedit.buffer.JEditBuffer;
import org.gjt.sp.jedit.syntax.*;
import org.gjt.sp.util.StandardUtilities;
//}}}
/**
* Contains several text manipulation methods.
*
*
* - Bracket matching
*
- Word start and end offset calculation
*
- String comparison
*
- Converting tabs to spaces and vice versa
*
- Wrapping text
*
- String case conversion
*
*
* @author Slava Pestov
* @version $Id: TextUtilities.java 12529 2008-05-01 04:47:58Z vanza $
*/
public class TextUtilities
{
// to avoid slowdown with large files; only scan 10000 lines either way
public static final int BRACKET_MATCH_LIMIT = 10000;
//{{{ getTokenAtOffset() method
/**
* Returns the token that contains the specified offset.
* @param tokens The token list
* @param offset The offset
* @since jEdit 4.0pre3
*/
public static Token getTokenAtOffset(Token tokens, int offset)
{
if(offset == 0 && tokens.id == Token.END)
return tokens;
for(;;)
{
if(tokens.id == Token.END)
throw new ArrayIndexOutOfBoundsException("offset > line length");
if(tokens.offset + tokens.length > offset)
return tokens;
else
tokens = tokens.next;
}
} //}}}
//{{{ getComplementaryBracket() method
/**
* Given an opening bracket, return the corresponding closing bracket
* and store true in direction[0]
. Given a closing bracket,
* return the corresponding opening bracket and store false in
* direction[0]
. Otherwise, return \0
.
* @since jEdit 4.3pre2
*/
public static char getComplementaryBracket(char ch, boolean[] direction)
{
switch(ch)
{
case '(': if (direction != null) direction[0] = true; return ')';
case ')': if (direction != null) direction[0] = false; return '(';
case '[': if (direction != null) direction[0] = true; return ']';
case ']': if (direction != null) direction[0] = false; return '[';
case '{': if (direction != null) direction[0] = true; return '}';
case '}': if (direction != null) direction[0] = false; return '{';
default: return '\0';
}
} //}}}
//{{{ findMatchingBracket() method
/**
* Returns the offset of the bracket matching the one at the
* specified offset of the buffer, or -1 if the bracket is
* unmatched (or if the character is not a bracket).
* @param buffer The buffer
* @param line The line
* @param offset The offset within that line
* @since jEdit 2.6pre1
*/
public static int findMatchingBracket(JEditBuffer buffer, int line, int offset)
{
if(offset < 0 || offset >= buffer.getLineLength(line))
{
throw new ArrayIndexOutOfBoundsException(offset + ":"
+ buffer.getLineLength(line));
}
Segment lineText = new Segment();
buffer.getLineText(line,lineText);
char c = lineText.array[lineText.offset + offset];
// false - backwards, true - forwards
boolean[] direction = new boolean[1];
// corresponding character
char cprime = getComplementaryBracket(c,direction);
if( cprime == '\0' )
{ // c is no bracket
return -1;
}
// 1 because we've already 'seen' the first bracket
int count = 1;
DefaultTokenHandler tokenHandler = new DefaultTokenHandler();
buffer.markTokens(line,tokenHandler);
// Get the syntax token at 'offset'
// only tokens with the same type will be checked for
// the corresponding bracket
byte idOfBracket = getTokenAtOffset(tokenHandler.getTokens(),offset).id;
boolean haveTokens = true;
int startLine = line;
//{{{ Forward search
if(direction[0])
{
offset++;
for(;;)
{
for(int i = offset; i < lineText.count; i++)
{
char ch = lineText.array[lineText.offset + i];
if(ch == c)
{
if(!haveTokens)
{
tokenHandler.init();
buffer.markTokens(line,tokenHandler);
haveTokens = true;
}
if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
count++;
}
else if(ch == cprime)
{
if(!haveTokens)
{
tokenHandler.init();
buffer.markTokens(line,tokenHandler);
haveTokens = true;
}
if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
{
count--;
if(count == 0)
return buffer.getLineStartOffset(line) + i;
}
}
}
//{{{ Go on to next line
line++;
if(line >= buffer.getLineCount() || (line - startLine) > BRACKET_MATCH_LIMIT)
break;
buffer.getLineText(line,lineText);
offset = 0;
haveTokens = false;
//}}}
}
} //}}}
//{{{ Backward search
else
{
offset--;
for(;;)
{
for(int i = offset; i >= 0; i--)
{
char ch = lineText.array[lineText.offset + i];
if(ch == c)
{
if(!haveTokens)
{
tokenHandler.init();
buffer.markTokens(line,tokenHandler);
haveTokens = true;
}
if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
count++;
}
else if(ch == cprime)
{
if(!haveTokens)
{
tokenHandler.init();
buffer.markTokens(line,tokenHandler);
haveTokens = true;
}
if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
{
count--;
if(count == 0)
return buffer.getLineStartOffset(line) + i;
}
}
}
//{{{ Go on to previous line
line--;
if(line < 0 || (startLine - line) > BRACKET_MATCH_LIMIT)
break;
buffer.getLineText(line,lineText);
offset = lineText.count - 1;
haveTokens = false;
//}}}
}
} //}}}
// Nothing found
return -1;
} //}}}
//{{{ findWordStart() method
/**
* Locates the start of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
*/
public static int findWordStart(String line, int pos, String noWordSep)
{
return findWordStart(line, pos, noWordSep, true, false);
} //}}}
//{{{ findWordStart() method
/**
* Locates the start of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @since jEdit 4.3pre15
*/
public static int findWordStart(CharSequence line,
int pos,
String noWordSep)
{
return findWordStart(line, pos, noWordSep, true, false, false);
} //}}}
/** Similar to perl's join() method on lists,
* but works with all collections.
*
* @param c An iterable collection of Objects
* @param delim a string to put between each object
* @return a joined toString() representation of the collection
*
* @since jedit 4.3pre3
*/
public static String join(Collection c, String delim) {
StringBuilder retval = new StringBuilder();
Iterator itr = c.iterator();
if (itr.hasNext()) {
retval.append( itr.next().toString() );
}
else return "";
while (itr.hasNext()) {
retval.append(delim);
retval.append(itr.next().toString());
}
return retval.toString();
}
//{{{ findWordStart() method
/**
* Locates the start of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @param joinNonWordChars Treat consecutive non-alphanumeric
* characters as one word
* @since jEdit 4.2pre5
*/
public static int findWordStart(String line, int pos, String noWordSep,
boolean joinNonWordChars)
{
return findWordStart(line,pos,noWordSep,joinNonWordChars,false);
} //}}}
//{{{ findWordStart() method
/**
* Locates the start of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @param joinNonWordChars Treat consecutive non-alphanumeric
* characters as one word
* @param eatWhitespace Include whitespace at start of word
* @since jEdit 4.1pre2
*/
public static int findWordStart(String line, int pos, String noWordSep,
boolean joinNonWordChars, boolean eatWhitespace)
{
return findWordStart(line, pos, noWordSep, joinNonWordChars, false, eatWhitespace);
} //}}}
//{{{ findWordStart() method
/**
* Locates the start of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @param joinNonWordChars Treat consecutive non-alphanumeric
* characters as one word
* @param camelCasedWords Treat "camelCased" parts as words
* @param eatWhitespace Include whitespace at start of word
* @since jEdit 4.3pre10
*/
public static int findWordStart(String line, int pos, String noWordSep,
boolean joinNonWordChars, boolean camelCasedWords,
boolean eatWhitespace)
{
return findWordStart((CharSequence) line, pos, noWordSep,
joinNonWordChars, camelCasedWords,
eatWhitespace);
} //}}}
//{{{ findWordStart() method
/**
* Locates the start of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @param joinNonWordChars Treat consecutive non-alphanumeric
* characters as one word
* @param camelCasedWords Treat "camelCased" parts as words
* @param eatWhitespace Include whitespace at start of word
* @since jEdit 4.3pre15
*/
public static int findWordStart(CharSequence line,
int pos,
String noWordSep,
boolean joinNonWordChars,
boolean camelCasedWords,
boolean eatWhitespace)
{
char ch = line.charAt(pos);
if(noWordSep == null)
noWordSep = "";
//{{{ the character under the cursor changes how we behave.
int type = getCharType(ch, noWordSep);
//}}}
loop: for(int i = pos; i >= 0; i--)
{
char lastCh = ch;
ch = line.charAt(i);
switch(type)
{
//{{{ Whitespace...
case WHITESPACE:
// only select other whitespace in this case
if(Character.isWhitespace(ch))
break;
// word char or symbol; stop
else
return i + 1; //}}}
//{{{ Word character...
case WORD_CHAR:
// stop at next last (in writing direction) upper case char if camel cased
// (don't stop at every upper case char, don't treat noWordSep as word chars)
if (camelCasedWords && Character.isUpperCase(ch) && !Character.isUpperCase(lastCh)
&& Character.isLetterOrDigit(lastCh))
{
return i;
}
// stop at next first (in writing direction) upper case char if camel cased
// (don't stop at every upper case char)
else if (camelCasedWords && !Character.isUpperCase(ch) && Character.isUpperCase(lastCh))
{
return i + 1;
}
// word char; keep going
else if(Character.isLetterOrDigit(ch) ||
noWordSep.indexOf(ch) != -1)
{
break;
}
// whitespace; include in word if eating
else if(Character.isWhitespace(ch)
&& eatWhitespace)
{
type = WHITESPACE;
break;
}
else
return i + 1; //}}}
//{{{ Symbol...
case SYMBOL:
if(!joinNonWordChars && pos != i)
return i + 1;
// whitespace; include in word if eating
if(Character.isWhitespace(ch))
{
if(eatWhitespace)
{
type = WHITESPACE;
break;
}
else
return i + 1;
}
else if(Character.isLetterOrDigit(ch) ||
noWordSep.indexOf(ch) != -1)
{
return i + 1;
}
else
{
break;
} //}}}
}
}
return 0;
} //}}}
//{{{ findWordEnd() method
/**
* Locates the end of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
*/
public static int findWordEnd(String line, int pos, String noWordSep)
{
return findWordEnd(line, pos, noWordSep, true);
} //}}}
//{{{ findWordEnd() method
/**
* Locates the end of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @since jEdit 4.3pre15
*/
public static int findWordEnd(CharSequence line,
int pos,
String noWordSep)
{
return findWordEnd(line, pos, noWordSep, true, false, false);
} //}}}
//{{{ findWordEnd() method
/**
* Locates the end of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @param joinNonWordChars Treat consecutive non-alphanumeric
* characters as one word
* @since jEdit 4.1pre2
*/
public static int findWordEnd(String line, int pos, String noWordSep,
boolean joinNonWordChars)
{
return findWordEnd(line,pos,noWordSep,joinNonWordChars,false);
} //}}}
//{{{ findWordEnd() method
/**
* Locates the end of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @param joinNonWordChars Treat consecutive non-alphanumeric
* characters as one word
* @param eatWhitespace Include whitespace at end of word
* @since jEdit 4.2pre5
*/
public static int findWordEnd(String line, int pos, String noWordSep,
boolean joinNonWordChars, boolean eatWhitespace)
{
return findWordEnd(line, pos, noWordSep, joinNonWordChars, false, eatWhitespace);
} //}}}
//{{{ findWordEnd() method
/**
* Locates the end of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @param joinNonWordChars Treat consecutive non-alphanumeric
* characters as one word
* @param camelCasedWords Treat "camelCased" parts as words
* @param eatWhitespace Include whitespace at end of word
* @since jEdit 4.3pre10
*/
public static int findWordEnd(String line, int pos, String noWordSep,
boolean joinNonWordChars, boolean camelCasedWords,
boolean eatWhitespace)
{
return findWordEnd((CharSequence)line, pos, noWordSep,
joinNonWordChars, camelCasedWords,
eatWhitespace);
} //}}}
//{{{ findWordEnd() method
/**
* Locates the end of the word at the specified position.
* @param line The text
* @param pos The position
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @param joinNonWordChars Treat consecutive non-alphanumeric
* characters as one word
* @param camelCasedWords Treat "camelCased" parts as words
* @param eatWhitespace Include whitespace at end of word
* @since jEdit 4.3pre15
*/
public static int findWordEnd(CharSequence line,
int pos,
String noWordSep,
boolean joinNonWordChars,
boolean camelCasedWords,
boolean eatWhitespace)
{
if(pos != 0)
pos--;
char ch = line.charAt(pos);
if(noWordSep == null)
noWordSep = "";
//{{{ the character under the cursor changes how we behave.
int type = getCharType(ch, noWordSep);
//}}}
loop: for(int i = pos; i < line.length(); i++)
{
char lastCh = ch;
ch = line.charAt(i);
switch(type)
{
//{{{ Whitespace...
case WHITESPACE:
// only select other whitespace in this case
if(Character.isWhitespace(ch))
break;
else
return i; //}}}
//{{{ Word character...
case WORD_CHAR:
// stop at next last upper case char if camel cased
// (don't stop at every upper case char, don't treat noWordSep as word chars)
if (camelCasedWords && i > pos + 1 && !Character.isUpperCase(ch) && Character.isLetterOrDigit(ch)
&& Character.isUpperCase(lastCh))
{
return i - 1;
}
// stop at next first upper case char if camel caseg (don't stop at every upper case char)
else if (camelCasedWords && Character.isUpperCase(ch) && !Character.isUpperCase(lastCh))
{
return i;
}
else if(Character.isLetterOrDigit(ch) ||
noWordSep.indexOf(ch) != -1)
{
break;
}
// whitespace; include in word if eating
else if(Character.isWhitespace(ch)
&& eatWhitespace)
{
type = WHITESPACE;
break;
}
else
return i; //}}}
//{{{ Symbol...
case SYMBOL:
if(!joinNonWordChars && i != pos)
return i;
// if we see whitespace, set flag.
if(Character.isWhitespace(ch))
{
if(eatWhitespace)
{
type = WHITESPACE;
break;
}
else
return i;
}
else if(Character.isLetterOrDigit(ch) ||
noWordSep.indexOf(ch) != -1)
{
return i;
}
else
{
break;
} //}}}
}
}
return line.length();
} //}}}
/**
* Returns the type of the char.
*
* @param ch the character
* @param noWordSep Characters that are non-alphanumeric, but
* should be treated as word characters anyway
* @return the type of the char : {@link #WHITESPACE}, {@link #WORD_CHAR}, {@link #SYMBOL}
*/
private static int getCharType(char ch, String noWordSep) {
int type;
if(Character.isWhitespace(ch))
type = WHITESPACE;
else if(Character.isLetterOrDigit(ch)
|| noWordSep.indexOf(ch) != -1)
type = WORD_CHAR;
else
type = SYMBOL;
return type;
}
//{{{ spacesToTabs() method
/**
* Converts consecutive spaces to tabs in the specified string.
* @param in The string
* @param tabSize The tab size
*/
public static String spacesToTabs(String in, int tabSize)
{
StringBuilder buf = new StringBuilder();
int width = 0;
int whitespace = 0;
for(int i = 0; i < in.length(); i++)
{
switch(in.charAt(i))
{
case ' ':
whitespace++;
width++;
break;
case '\t':
int tab = tabSize - (width % tabSize);
width += tab;
whitespace += tab;
break;
case '\n':
if(whitespace != 0)
{
buf.append(StandardUtilities
.createWhiteSpace(whitespace,tabSize,
width - whitespace));
}
whitespace = 0;
width = 0;
buf.append('\n');
break;
default:
if(whitespace != 0)
{
buf.append(StandardUtilities
.createWhiteSpace(whitespace,tabSize,
width - whitespace));
whitespace = 0;
}
buf.append(in.charAt(i));
width++;
break;
}
}
if(whitespace != 0)
{
buf.append(StandardUtilities.createWhiteSpace(whitespace,tabSize,
width - whitespace));
}
return buf.toString();
} //}}}
//{{{ tabsToSpaces() method
/**
* Converts tabs to consecutive spaces in the specified string.
* @param in The string
* @param tabSize The tab size
*/
public static String tabsToSpaces(String in, int tabSize)
{
StringBuilder buf = new StringBuilder();
int width = 0;
for(int i = 0; i < in.length(); i++)
{
switch(in.charAt(i))
{
case '\t':
int count = tabSize - (width % tabSize);
width += count;
while(--count >= 0)
buf.append(' ');
break;
case '\n':
width = 0;
buf.append(in.charAt(i));
break;
default:
width++;
buf.append(in.charAt(i));
break;
}
}
return buf.toString();
} //}}}
//{{{ format() method
/**
* Formats the specified text by merging and breaking lines to the
* specified width.
* @param text The text
* @param maxLineLength The maximum line length
* @param tabSize The tab size
*/
public static String format(String text, int maxLineLength, int tabSize)
{
StringBuilder buf = new StringBuilder();
int index = 0;
for(;;)
{
int newIndex = text.indexOf("\n\n",index);
if(newIndex == -1)
break;
formatParagraph(text.substring(index,newIndex),
maxLineLength,tabSize,buf);
buf.append("\n\n");
index = newIndex + 2;
}
if(index != text.length())
{
formatParagraph(text.substring(index),
maxLineLength,tabSize,buf);
}
return buf.toString();
} //}}}
//{{{ indexIgnoringWhitespace() method
/**
* Inverse of ignoringWhitespaceIndex()
.
* @param str a string (not an empty string)
* @param index The index
* @return The number of non-whitespace characters that precede the index.
* @since jEdit 4.3pre2
*/
public static int indexIgnoringWhitespace(String str, int index)
{
int j = 0;
for(int i = 0; i < index; i++)
if(!Character.isWhitespace(str.charAt(i))) j++;
return j;
} //}}}
//{{{ ignoringWhitespaceIndex() method
/**
* Inverse of indexIgnoringWhitespace()
.
* @param str a string (not an empty string)
* @param index The index
* @return The index into the string where the number of non-whitespace
* characters that precede the index is count.
* @since jEdit 4.3pre2
*/
public static int ignoringWhitespaceIndex(String str, int index)
{
int j = 0;
for(int i = 0;;i++)
{
if(!Character.isWhitespace(str.charAt(i))) j++;
if(j > index)
return i;
if(i == str.length() - 1)
return i + 1;
}
} //}}}
//{{{ getStringCase() method
public static final int MIXED = 0;
public static final int LOWER_CASE = 1;
public static final int UPPER_CASE = 2;
public static final int TITLE_CASE = 3;
/**
* Returns if the specified string is all upper case, all lower case,
* or title case (first letter upper case, rest lower case).
* @param str The string
* @since jEdit 4.0pre1
*/
public static int getStringCase(String str)
{
if(str.length() == 0)
return MIXED;
int state = -1;
char ch = str.charAt(0);
if(Character.isLetter(ch))
{
if(Character.isUpperCase(ch))
state = UPPER_CASE;
else
state = LOWER_CASE;
}
for(int i = 1; i < str.length(); i++)
{
ch = str.charAt(i);
if(!Character.isLetter(ch))
continue;
switch(state)
{
case UPPER_CASE:
if(Character.isLowerCase(ch))
{
if(i == 1)
state = TITLE_CASE;
else
return MIXED;
}
break;
case LOWER_CASE:
case TITLE_CASE:
if(Character.isUpperCase(ch))
return MIXED;
break;
}
}
return state;
} //}}}
//{{{ toTitleCase() method
/**
* Converts the specified string to title case, by capitalizing the
* first letter.
* @param str The string
* @since jEdit 4.0pre1
*/
public static String toTitleCase(String str)
{
if(str.length() == 0)
return str;
else
{
return Character.toUpperCase(str.charAt(0))
+ str.substring(1).toLowerCase();
}
} //}}}
//{{{ Private members
private static final int WHITESPACE = 0;
private static final int WORD_CHAR = 1;
private static final int SYMBOL = 2;
//{{{ formatParagraph() method
private static void formatParagraph(String text, int maxLineLength,
int tabSize, StringBuilder buf)
{
// align everything to paragraph's leading indent
int leadingWhitespaceCount = StandardUtilities.getLeadingWhiteSpace(text);
String leadingWhitespace = text.substring(0,leadingWhitespaceCount);
int leadingWhitespaceWidth = StandardUtilities.getLeadingWhiteSpaceWidth(text,tabSize);
buf.append(leadingWhitespace);
int lineLength = leadingWhitespaceWidth;
StringTokenizer st = new StringTokenizer(text);
while(st.hasMoreTokens())
{
String word = st.nextToken();
if(lineLength == leadingWhitespaceWidth)
{
// do nothing
}
else if(lineLength + word.length() + 1 > maxLineLength)
{
buf.append('\n');
buf.append(leadingWhitespace);
lineLength = leadingWhitespaceWidth;
}
else
{
buf.append(' ');
lineLength++;
}
buf.append(word);
lineLength += word.length();
}
} //}}}
//{{{ indexIgnoringWhitespace() method
public static void indexIgnoringWhitespace(String text, int maxLineLength,
int tabSize, StringBuffer buf)
{
// align everything to paragraph's leading indent
int leadingWhitespaceCount = StandardUtilities.getLeadingWhiteSpace(text);
String leadingWhitespace = text.substring(0,leadingWhitespaceCount);
int leadingWhitespaceWidth = StandardUtilities.getLeadingWhiteSpaceWidth(text,tabSize);
buf.append(leadingWhitespace);
int lineLength = leadingWhitespaceWidth;
StringTokenizer st = new StringTokenizer(text);
while(st.hasMoreTokens())
{
String word = st.nextToken();
if(lineLength == leadingWhitespaceWidth)
{
// do nothing
}
else if(lineLength + word.length() + 1 > maxLineLength)
{
buf.append('\n');
buf.append(leadingWhitespace);
lineLength = leadingWhitespaceWidth;
}
else
{
buf.append(' ');
lineLength++;
}
buf.append(word);
lineLength += word.length();
}
} //}}}
//}}}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy