/** Let's practice tokenizing a string. * If you want to tokenize simply by spaces, then you can use the Scanner * class and use next() repeatedly. * But if you want to tokenize on a punctuation symbol, then you can * use the String class' split() function. * If you include \\s* on either side of the punctuation symbol when you call * split, then we also trim spaces from the tokens. * \\s* means 0 or more whitespace characters * \\s+ means 1 or more whitespace characters * For example, splitting on ",\\s+" would handle commas followed by * 1 or more spaces. But it wouldn't separate: 4,5 * If you simply want to tokenize based on all punctuation and space * characters use the pre-defined sets {Punct} and {Space}. * If you use both the Punct and Space sets without the \\s* on both sides, * you get a lot of empty tokens. The \\s* on both sides trims spaces. * For detailed information on what delimiter-string to pass to split, * see the online documentation on the "Pattern" class. */ public class Driver { public static void main(String [] args) { String line = "dmchugh ftp davidm Fri Oct 12 11:07 - down (4+03:36)"; String pattern = "\\s*[\\p{Punct}\\p{Space}]\\s*"; showTokens(line, pattern); line = "4,5, 6,7"; pattern = ",\\s+"; showTokens(line, pattern); line = "moo-goo--gai-pan"; pattern = "--"; showTokens(line, pattern); } public static void showTokens(String input, String pattern) { // This is the most important step: We need to call the split // function and put the result in an array of Strings. String [] token = input.split(pattern); // Print the tokens System.out.printf("\nHere are your tokens:\n"); for (int i = 0; i < token.length; ++i) System.out.printf("Token #%d: -----%s-----\n", i+1, token[i]); } }