Monday, October 30, 2006

StringUtil

Useful String utilities

/*
 * net/balusc/util/StringUtil.java
 * 
 * Copyright (C) 2007 BalusC
 * 
 * This program is free software; you can redistribute it and/or modify it under the terms of the
 * GNU General Public License as published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along with this program; if
 * not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */

package net.balusc.util;

import java.util.Collection;
import java.util.Iterator;

/**
 * Useful String utilities.
 * 
 * @author BalusC
 * @link http://balusc.blogspot.com/2006/10/stringutil.html
 */
public final class StringUtil {

    // Init ---------------------------------------------------------------------------------------

    /** Argument for <tt>StringUtil#pad()</tt>, set the pad direction to LEFT. */
    public static final int PAD_LEFT = -1;

    /** Argument for <tt>StringUtil#pad()</tt>, set the pad direction to BOTH. */
    public static final int PAD_BOTH = 0;

    /** Argument for <tt>StringUtil#pad()</tt>, set the pad direction to RIGHT. */
    public static final int PAD_RIGHT = 1;

    private StringUtil() {
        // Utility class, hide the constructor.
    }

    // Actions ------------------------------------------------------------------------------------

    /**
     * Pad the given string with the given pad value to the given length in the given direction.
     * Valid directions are <tt>StringUtil.PAD_LEFT</tt>, <tt>StringUtil.PAD_BOTH</tt> and
     * <tt>StringUtil.PAD_RIGHT</tt>. When using <tt>StringUtil.PAD_BOTH</tt>, padding left
     * has precedence over padding right when difference between string's length and the given
     * length is odd.
     * @param string The string to be padded.
     * @param pad The value to pad the given string with.
     * @param length The length to pad the given string to.
     * @param direction The direction to pad the given string to.
     * @return The padded string.
     * @throws IllegalArgumentException If invalid direction is given.
     */
    public static String pad(String string, String pad, int length, int direction)
        throws IllegalArgumentException {
        StringBuilder builder = new StringBuilder(string);

        switch (direction) {
            case PAD_LEFT:
                while (builder.length() < length) {
                    builder.insert(0, pad);
                }
                break;

            case PAD_RIGHT:
                while (builder.length() < length) {
                    builder.append(pad);
                }
                break;

            case PAD_BOTH:
                int right = (length - builder.length()) / 2 + builder.length();
                while (builder.length() < right) {
                    builder.append(pad);
                }
                while (builder.length() < length) {
                    builder.insert(0, pad);
                }
                break;

            default:
                throw new IllegalArgumentException("Invalid direction, must be one of"
                    + " StringUtil.PAD_LEFT, StringUtil.PAD_BOTH or StringUtil.PAD_RIGHT.");
        }

        return builder.toString();
    }

    /**
     * Trim the given string with the given trim value.
     * @param string The string to be trimmed.
     * @param trim The value to trim the given string off.
     * @return The trimmed string.
     */
    public static String trim(String string, String trim) {
        if (trim.length() == 0) {
            return string;
        }

        int start = 0;
        int end = string.length();
        int length = trim.length();

        while (start + length <= end && string.substring(start, start + length).equals(trim)) {
            start += length;
        }
        while (start + length <= end && string.substring(end - length, end).equals(trim)) {
            end -= length;
        }

        return string.substring(start, end);
    }

    /**
     * Join the given collection with the given join value.
     * @param collection The collection (List, Set) to be joined.
     * @param join The value to be joined between each part.
     * @return The joined collection.
     */
    public static String join(Collection<?> collection, String join) {
        StringBuilder builder = new StringBuilder();

        for (Iterator<?> iter = collection.iterator(); iter.hasNext();) {
            builder.append(iter.next());

            if (iter.hasNext()) {
                builder.append(join);
            }
        }

        return builder.toString();
    }

    /**
     * Join the given ordinary array with the given join value.
     * @param objects The ordinary array (String[], Integer[], etc) to be joined.
     * @param join The value to be joined between each part.
     * @return The joined array.
     */
    public static String join(Object[] objects, String join) {
        StringBuilder builder = new StringBuilder();

        for (int i = 0; i < objects.length;) {
            builder.append(objects[i]);

            if (++i < objects.length) {
                builder.append(join);
            }
        }

        return builder.toString();
    }

    /**
     * Decapitalize the given string. The first character will be lowercased.
     * @param string The string to decapitalize.
     * @return The decapitalized string.
     */
    public static String decapitalize(String string) {
        if (string.length() == 0) {
            return string;
        }
        return string.substring(0, 1).toLowerCase() + string.substring(1);
    }

    /**
     * Check if given string is a number. It should contain digits only.
     * @param string The string to check on.
     * @return True if string is a number. False if not.
     */
    public static boolean isNumber(String string) {
        return string.matches("^\\d+$");
    }

    /**
     * Check if given string is numeric. Positive and negative prefix and dot separators are
     * allowed.
     * @param string The string to check on.
     * @return True if string is numeric. False if not.
     */
    public static boolean isNumeric(String string) {
        return string.matches("^[-+]?\\d+(\\.\\d+)?$");
    }

    /**
     * Check if given string is a valuta. The dot separator and two decimals are required.
     * @param string The string to check on.
     * @return True if string is valuta. False if not.
     */
    public static boolean isValuta(String string) {
        return string.matches("^\\d+\\.\\d{2}$");
    }

    /**
     * Check if given string contains numbers.
     * @param string The string to check on.
     * @return True if string contains numbers. False if not.
     */
    public static boolean hasNumbers(String string) {
        return string.matches("^.*\\d.*$");
    }

    /**
     * Check if given string is a valid email address. This confirms the RFC822 & RFC1035
     * specifications.
     * @param string The string to check on.
     * @return True if string is an valid email address. False if not.
     */
    public static boolean isEmailAddress(String string) {
        return string.toLowerCase().matches(
            "^[a-z0-9-~#&\\_]+(\\.[a-z0-9-~#&\\_]+)*@([a-z0-9-]+\\.)+[a-z]{2,5}$");
    }

    /**
     * Remove any XSS (Cross Site Scripting) vulrenabilities from the given string.
     * @param string The string to remove XSS from.
     * @return The string with removed XSS, if any.
     */
    public static String removeXss(String string) {
        return string
            .replaceAll("(?i)<script.*?>.*?</script.*?>", "") // Remove all <script> tags.
            .replaceAll("(?i)<.*?javascript:.*?>.*?</.*?>", "") // Remove tags with javascript: call.
            .replaceAll("(?i)<.*?\\s+on.*?>.*?</.*?>", ""); // Remove tags with on* attributes.
    }

}

Instead of using regexps for isNumber and isNumeric, you can also consider the following implementations (mis)using the NumberFormatException:


    /**
     * Check if given string is a number. It should contain digits only.
     * @param string The string to check on.
     * @return True if string is a number. False if not.
     */
    public static boolean isNumber(String string) {
        try {
            new BigInteger(string);
            return true;
        } catch (NumberFormatException e) {
            return false;
        }
    }

    /**
     * Check if given string is numeric. Positive and negative prefix 
     * and dot separators are allowed.
     * @param string The string to check on.
     * @return True if string is numeric. False if not.
     */
    public static boolean isNumeric(String string) {
        try {
            new BigDecimal(string);
            return true;
        } catch (NumberFormatException e) {
            return false;
        }
    }

In perspective to the regexp methods, those methods are slightly faster (about 10%) when no exception is thrown, but those are much more slower (about 5 times slower) when an exception will be thrown.

Copyright - GNU General Public License

(C) October 2006, BalusC

MathUtil

Useful Math utilities

/*
 * net/balusc/util/MathUtil.java
 * 
 * Copyright (C) 2007 BalusC
 * 
 * This program is free software; you can redistribute it and/or modify it under the terms of the
 * GNU General Public License as published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along with this program; if
 * not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */

package net.balusc.util;

import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Random;

/**
 * Useful Math utilities.
 * 
 * @author BalusC
 * @link http://balusc.blogspot.com/2006/10/mathutil.html
 */
public final class MathUtil {

    // Init ---------------------------------------------------------------------------------------

    private static final Random RANDOM = new Random();

    private MathUtil() {
        // Utility class, hide the constructor.
    }

    // Actions ------------------------------------------------------------------------------------

    /**
     * Generates unique ID based on timestamp, thread ID, random seed and random double. Each part
     * is converted to hexadecimal, padded left with zeros to a length of 8 chars and then
     * concatentated to each other.
     * @return Unique 32-char hexadecimal string.
     */
    public static synchronized String uniqueID() {
        final String pad = "0";
        final int padLength = 8;
        final int padDirection = StringUtil.PAD_LEFT;

        String timestamp = Integer.toHexString((int) System.currentTimeMillis());
        String threadHashCode = Integer.toHexString(Thread.currentThread().hashCode());
        String randomInt = Integer.toHexString(RANDOM.nextInt());
        String mathRandom = Integer.toHexString((int) (Math.random() * Integer.MAX_VALUE));

        // You may remove this try block if you want to increase performance by 1ms.
        // But the generated ID might not be fully 100% guaranteed to be unique.
        // If you want to keep this block, then you can "almost" safely remove the
        // randomInt and mathRandom parts of the UniqueID.
        try {
            Thread.sleep(1); // System.currentTimeMillis()++
        } catch (InterruptedException e) {
            // Do nothing. This exception shouldn't occur however since we are
            // in a synchronized block. I haven't seen it been thrown earlier.
        }

        return StringUtil.pad(timestamp, pad, padLength, padDirection)
            + StringUtil.pad(threadHashCode, pad, padLength, padDirection)
            + StringUtil.pad(randomInt, pad, padLength, padDirection)
            + StringUtil.pad(mathRandom, pad, padLength, padDirection);
    }

    /**
     * Generate MD5 hash for the given String. MD5 is kind of an one-way encryption. Very useful for
     * hashing passwords before saving in database.
     * @param string The String to generate the MD5 hash for.
     * @return The 32-char hexadecimal MD5 hash of the given String, if necessary padded left with
     * one zero.
     */
    public static String hashMD5(String string) {
        try {
            MessageDigest md5 = MessageDigest.getInstance("MD5");
            String hash = new BigInteger(md5.digest(string.getBytes("UTF-8"))).abs().toString(16);
            return StringUtil.pad(hash, "0", 32, StringUtil.PAD_LEFT);
        } catch (NoSuchAlgorithmException e) {
            // Unusual exception. "MD5" is just hardcoded and supported.
            throw new RuntimeException(e);
        } catch (UnsupportedEncodingException e) {
            // Unusual exception. "UTF-8" is just hardcoded and supported.
            throw new RuntimeException(e);
        }
    }

    /**
     * Factorial calculator with nearly unlimited outcome.
     * @param n The number to calculate factorial for.
     * @return The factorial of the given number.
     */
    public static BigInteger factorial(BigInteger n) {
        BigInteger factorial = BigInteger.ONE;

        while (n.compareTo(BigInteger.ONE) == 1) {
            factorial = factorial.multiply(n);
            n = n.subtract(BigInteger.ONE);
        }

        return factorial;
    }

    /**
     * Generate random number < Short.MAX_VALUE.
     * @return Random number < Short.MAX_VALUE.
     */
    public static Number randomNumber() {
        return new Integer((int) (Math.random() * Short.MAX_VALUE));
    }

    /**
     * Generate random string matching [0-9a-z]{1,6}.
     * @return Random string matching [0-9a-z]{1,6}.
     */
    public static String randomString() {
        return Integer.toString((int) (Math.random() * Integer.MAX_VALUE) & Integer.MAX_VALUE, 36);
    }

}

You can find the required StringUtil utility class here.

Copyright - GNU General Public License

(C) October 2006, BalusC