"Tokenize" a stringTag(s): String/Number


import java.util.StringTokenizer;
...

StringTokenizer st =
       new StringTokenizer
           ("This is the string to be tokenized", " ");

while(st.hasMoreTokens()){
  String s=st.nextToken();
  System.out.println(s);
}

/*
  output is :
  This
  is
  the
  string
  to
  be
  tokenized
*/
StringTokenizer does not react correctly if you have two separators consecutively.

Here an enhanced StringTokenizer (thanks to jsanza) to deal with that :

import java.util.*;
/**
 * This class wraps a standard java.util.StringTokenizer, but provides
 * an additional mode of operation (NO_CONSECUTIVE_DELIMS).
 *
 */
public class EnhancedStringTokenizer implements Enumeration {
    public static final int NO_RETURN_DELIMS = 0;
    public static final int RETURN_DELIMS = 1;
    public static final int NO_CONSECUTIVE_DELIMS = 2;
    protected int Mode = NO_CONSECUTIVE_DELIMS;

    void setMode(int mode) {
        Mode = mode;
    }

    public int getMode() {
        return Mode;
    }

    // default delimiter in the StringTokenizer
    protected String Delimiter = " \t\n\r\f";
    protected void setDelimiter(String delim){
        Delimiter = delim;
    }

    public String getDelimiter() {
        return Delimiter;
    }

    protected String Remainder = "";
    protected void setRemainder(String str) {
        Remainder = str;
    }

    public String getRemainder() {
        return Remainder;
    }


    protected java.util.StringTokenizer st = null;
    /**
     * Same as the StringTokenizer constructor.  No added functionality.
     */
    public EnhancedStringTokenizer(String str) {
        setMode(this.NO_RETURN_DELIMS);
        st = new StringTokenizer(str);
    }


    /**
     * Same as the StringTokenizer constructor.  No added functionality.
     */
    public EnhancedStringTokenizer(String str, String delim){
        setMode(this.NO_RETURN_DELIMS);
        setDelimiter(delim);
        st = new StringTokenizer(str, delim);
    }


    /**
     * Same as the StringTokenizer constructor.  No added functionality.
     */
    public EnhancedStringTokenizer
        (String str, String delim, boolean returnDelims) {
        if (returnDelims) {
            setMode(RETURN_DELIMS);
        }
        else {
            setMode(NO_RETURN_DELIMS);
        }
        setDelimiter(delim);
        st = new StringTokenizer(str, delim, returnDelims);
    }


    /**
     * Using this constructor allows use of the NO_CONSECUTIVE_DELIMS mode
     * of operation.
     */
    public EnhancedStringTokenizer(String str, String delim, int mode) {
        setMode(mode);
        setDelimiter(delim);
        switch (getMode()) {
            case NO_RETURN_DELIMS :
                st = new StringTokenizer(str, delim, false);
                break;
            case RETURN_DELIMS :
                st = new StringTokenizer(str, delim, true);
                break;
            case NO_CONSECUTIVE_DELIMS :
            default :
                init(str);
                break;
        }
    }

    void init(String str) {
        setRemainder(str);
    }

    public int countTokens() {
        switch (getMode()) {      {
            case NO_CONSECUTIVE_DELIMS :
                {
                    String oldRem = getRemainder();
                    int count = 0;
                    try {
                        String temp;
                        while (true) {
                            temp = nextToken();
                            count++;
                        }
                    }
                    catch (NoSuchElementException nsee) {
                        setRemainder(oldRem);
                        return count;
                    }
                }
            default :
                return st.countTokens();
        }
    }

    public boolean hasMoreElements() {
        switch (getMode()) {
            case NO_CONSECUTIVE_DELIMS :
                int intIndex = 0;
                intIndex =
                    getRemainder().indexOf(getDelimiter(), intIndex);
                if (intIndex != -1) {
                    return true;
                }
                else {
                    if (getRemainder().length() > 0) {
                        return true;
                    }
                    else {
                        return false;
                    }
                }
            default :
                return st.hasMoreElements();
        }
    }

    public Object nextElement() throws NoSuchElementException {
        switch (getMode()) {
            case NO_CONSECUTIVE_DELIMS :
                int intIndex = 0;
                intIndex =
                    getRemainder().indexOf(getDelimiter(), intIndex);
                if (intIndex != -1) {
                    String retValue =
                         getRemainder().substring(0, intIndex);
                    setRemainder(getRemainder().substring(intIndex + 1));
                    return retValue;
                }
                else {
                    if (getRemainder().length() > 0) {
                        String retValue = getRemainder();
                        setRemainder("");
                        return retValue;
                    }
                    else {
                        throw new NoSuchElementException();
                    }
                }
            default :
                return st.nextElement();
        }
    }

    public boolean hasMoreTokens() {
        switch (getMode()) {
            case NO_CONSECUTIVE_DELIMS :
                int intIndex = 0;
                intIndex =
                    getRemainder().indexOf(getDelimiter(), intIndex);
                if (intIndex != -1) {
                    return true;
                }
                else {
                    if (getRemainder().length() > 0) {
                        return true;
                    }
                    else {
                        return false;
                    }
                }
            default :
                return st.hasMoreElements();
        }
    }

    public String nextToken() throws NoSuchElementException {
        switch (getMode()) {
            case NO_CONSECUTIVE_DELIMS :
                int intIndex = 0;
                intIndex =
                    getRemainder().indexOf(getDelimiter(), intIndex);
                if (intIndex != -1) {
                    String retValue =
                         getRemainder().substring(0, intIndex);
                    setRemainder(getRemainder().substring(intIndex + 1));
                    return retValue;
                }
                else {
                    if (getRemainder().length() > 0) {
                        String retValue = getRemainder();
                        setRemainder("");
                        return retValue;
                    }
                    else {
                        throw new NoSuchElementException();
                    }
                }
            default :
                return st.nextToken();
        }
    }

    public String nextToken(String delim) throws NoSuchElementException {
        switch (getMode()) {
            case NO_CONSECUTIVE_DELIMS :
                setDelimiter(delim);
                return nextToken();
            default :
                return st.nextToken(delim);
        }
    }



    public static void main(String a[]) {
        String testStr = "|One|Two|Three||Five";
        EnhancedStringTokenizer st =
            new EnhancedStringTokenizer(
                testStr,
                "|",
                EnhancedStringTokenizer.NO_CONSECUTIVE_DELIMS);
        while (st.hasMoreTokens()) {
            String aux = st.nextToken();
            System.out.print(aux.length());
            System.out.println("->"+aux+"<-");
        }
    }
}

See also this HowTo to split a given String into an array based on a specific separator.

For JDK1.4+, you may want to look at the String.split() method. See this HowTo.



blog comments powered by Disqus