View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.miloss.fgsms.common.codec;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.ByteBuffer;
22  import java.nio.charset.Charset;
23  
24  
25  /**
26   * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
27   * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
28   * Standard charsets</a>.
29   *
30   * <p>This class is immutable and thread-safe.</p>
31   *
32   * @see CharEncoding
33   * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
34   * @version $Id$
35   * @since 1.4
36   */
37  public class StringUtils {
38  
39      /**
40       * <p>
41       * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
42       * </p>
43       *
44       * <p>
45       * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
46       * The comparison is case sensitive.
47       * </p>
48       *
49       * <pre>
50       * StringUtils.equals(null, null)   = true
51       * StringUtils.equals(null, "abc")  = false
52       * StringUtils.equals("abc", null)  = false
53       * StringUtils.equals("abc", "abc") = true
54       * StringUtils.equals("abc", "ABC") = false
55       * </pre>
56       *
57       * <p>
58       * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
59       * </p>
60       *
61       * @see Object#equals(Object)
62       * @param cs1
63       *            the first CharSequence, may be <code>null</code>
64       * @param cs2
65       *            the second CharSequence, may be <code>null</code>
66       * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
67       * @since 1.10
68       */
69      public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
70          if (cs1 == cs2) {
71              return true;
72          }
73          if (cs1 == null || cs2 == null) {
74              return false;
75          }
76          if (cs1 instanceof String && cs2 instanceof String) {
77              return cs1.equals(cs2);
78          }
79          return CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, Math.max(cs1.length(), cs2.length()));
80      }
81  
82      /**
83       * Calls {@link String#getBytes(Charset)}
84       *
85       * @param string
86       *            The string to encode (if null, return null).
87       * @param charset
88       *            The {@link Charset} to encode the <code>String</code>
89       * @return the encoded bytes
90       */
91      private static byte[] getBytes(final String string, final Charset charset) {
92          if (string == null) {
93              return null;
94          }
95          return string.getBytes(charset);
96      }
97  
98      /**
99       * Calls {@link String#getBytes(Charset)}
100      *
101      * @param string
102      *            The string to encode (if null, return null).
103      * @param charset
104      *            The {@link Charset} to encode the <code>String</code>
105      * @return the encoded bytes
106      * @since 1.11
107      */
108     private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
109         if (string == null) {
110             return null;
111         }
112         return ByteBuffer.wrap(string.getBytes(charset));
113     }
114 
115     /**
116      * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
117      * array.
118      *
119      * @param string
120      *            the String to encode, may be <code>null</code>
121      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
122      * @throws NullPointerException
123      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
124      *             required by the Java platform specification.
125      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126      * @see #getBytesUnchecked(String, String)
127      * @since 1.11
128      */
129     public static ByteBuffer getByteBufferUtf8(final String string) {
130         return getByteBuffer(string, Charsets.UTF_8);
131     }
132 
133     /**
134      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
135      * byte array.
136      *
137      * @param string
138      *            the String to encode, may be <code>null</code>
139      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
140      * @throws NullPointerException
141      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
142      *             required by the Java platform specification.
143      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
144      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
145      * @see #getBytesUnchecked(String, String)
146      */
147     public static byte[] getBytesIso8859_1(final String string) {
148         return getBytes(string, Charsets.ISO_8859_1);
149     }
150 
151 
152     /**
153      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
154      * array.
155      * <p>
156      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
157      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
158      * </p>
159      *
160      * @param string
161      *            the String to encode, may be <code>null</code>
162      * @param charsetName
163      *            The name of a required {@link java.nio.charset.Charset}
164      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
165      * @throws IllegalStateException
166      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
167      *             required charset name.
168      * @see CharEncoding
169      * @see String#getBytes(String)
170      */
171     public static byte[] getBytesUnchecked(final String string, final String charsetName) {
172         if (string == null) {
173             return null;
174         }
175         try {
176             return string.getBytes(charsetName);
177         } catch (final UnsupportedEncodingException e) {
178             throw StringUtils.newIllegalStateException(charsetName, e);
179         }
180     }
181 
182     /**
183      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
184      * array.
185      *
186      * @param string
187      *            the String to encode, may be <code>null</code>
188      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
189      * @throws NullPointerException
190      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
191      *             required by the Java platform specification.
192      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
193      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
194      * @see #getBytesUnchecked(String, String)
195      */
196     public static byte[] getBytesUsAscii(final String string) {
197         return getBytes(string, Charsets.US_ASCII);
198     }
199 
200     /**
201      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
202      * array.
203      *
204      * @param string
205      *            the String to encode, may be <code>null</code>
206      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
207      * @throws NullPointerException
208      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
209      *             required by the Java platform specification.
210      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
211      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
212      * @see #getBytesUnchecked(String, String)
213      */
214     public static byte[] getBytesUtf16(final String string) {
215         return getBytes(string, Charsets.UTF_16);
216     }
217 
218     /**
219      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
220      * array.
221      *
222      * @param string
223      *            the String to encode, may be <code>null</code>
224      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
225      * @throws NullPointerException
226      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
227      *             required by the Java platform specification.
228      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
229      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
230      * @see #getBytesUnchecked(String, String)
231      */
232     public static byte[] getBytesUtf16Be(final String string) {
233         return getBytes(string, Charsets.UTF_16BE);
234     }
235 
236     /**
237      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
238      * array.
239      *
240      * @param string
241      *            the String to encode, may be <code>null</code>
242      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
243      * @throws NullPointerException
244      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
245      *             required by the Java platform specification.
246      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
247      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
248      * @see #getBytesUnchecked(String, String)
249      */
250     public static byte[] getBytesUtf16Le(final String string) {
251         return getBytes(string, Charsets.UTF_16LE);
252     }
253 
254     /**
255      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
256      * array.
257      *
258      * @param string
259      *            the String to encode, may be <code>null</code>
260      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
261      * @throws NullPointerException
262      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
263      *             required by the Java platform specification.
264      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
265      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
266      * @see #getBytesUnchecked(String, String)
267      */
268     public static byte[] getBytesUtf8(final String string) {
269         return getBytes(string, Charsets.UTF_8);
270     }
271 
272     private static IllegalStateException newIllegalStateException(final String charsetName,
273                                                                   final UnsupportedEncodingException e) {
274         return new IllegalStateException(charsetName + ": " + e);
275     }
276 
277     /**
278      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
279      *
280      * @param bytes
281      *            The bytes to be decoded into characters
282      * @param charset
283      *            The {@link Charset} to encode the <code>String</code>
284      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
285      *         or <code>null</code> if the input byte array was <code>null</code>.
286      * @throws NullPointerException
287      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
288      *             required by the Java platform specification.
289      */
290     private static String newString(final byte[] bytes, final Charset charset) {
291         return bytes == null ? null : new String(bytes, charset);
292     }
293 
294     /**
295      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
296      * <p>
297      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
298      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
299      * </p>
300      *
301      * @param bytes
302      *            The bytes to be decoded into characters, may be <code>null</code>
303      * @param charsetName
304      *            The name of a required {@link java.nio.charset.Charset}
305      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
306      *         or <code>null</code> if the input byte array was <code>null</code>.
307      * @throws IllegalStateException
308      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
309      *             required charset name.
310      * @see CharEncoding
311      * @see String#String(byte[], String)
312      */
313     public static String newString(final byte[] bytes, final String charsetName) {
314         if (bytes == null) {
315             return null;
316         }
317         try {
318             return new String(bytes, charsetName);
319         } catch (final UnsupportedEncodingException e) {
320             throw StringUtils.newIllegalStateException(charsetName, e);
321         }
322     }
323 
324     /**
325      * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
326      *
327      * @param bytes
328      *            The bytes to be decoded into characters, may be <code>null</code>
329      * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
330      *         <code>null</code> if the input byte array was <code>null</code>.
331      * @throws NullPointerException
332      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
333      *             required by the Java platform specification.
334      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
335      */
336     public static String newStringIso8859_1(final byte[] bytes) {
337         return new String(bytes, Charsets.ISO_8859_1);
338     }
339 
340     /**
341      * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
342      *
343      * @param bytes
344      *            The bytes to be decoded into characters
345      * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
346      *         or <code>null</code> if the input byte array was <code>null</code>.
347      * @throws NullPointerException
348      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
349      *             required by the Java platform specification.
350      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
351      */
352     public static String newStringUsAscii(final byte[] bytes) {
353         return new String(bytes, Charsets.US_ASCII);
354     }
355 
356     /**
357      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
358      *
359      * @param bytes
360      *            The bytes to be decoded into characters
361      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
362      *         or <code>null</code> if the input byte array was <code>null</code>.
363      * @throws NullPointerException
364      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
365      *             required by the Java platform specification.
366      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
367      */
368     public static String newStringUtf16(final byte[] bytes) {
369         return new String(bytes, Charsets.UTF_16);
370     }
371 
372     /**
373      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
374      *
375      * @param bytes
376      *            The bytes to be decoded into characters
377      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
378      *         or <code>null</code> if the input byte array was <code>null</code>.
379      * @throws NullPointerException
380      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
381      *             required by the Java platform specification.
382      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
383      */
384     public static String newStringUtf16Be(final byte[] bytes) {
385         return new String(bytes, Charsets.UTF_16BE);
386     }
387 
388     /**
389      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
390      *
391      * @param bytes
392      *            The bytes to be decoded into characters
393      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
394      *         or <code>null</code> if the input byte array was <code>null</code>.
395      * @throws NullPointerException
396      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
397      *             required by the Java platform specification.
398      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
399      */
400     public static String newStringUtf16Le(final byte[] bytes) {
401         return new String(bytes, Charsets.UTF_16LE);
402     }
403 
404     /**
405      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
406      *
407      * @param bytes
408      *            The bytes to be decoded into characters
409      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
410      *         or <code>null</code> if the input byte array was <code>null</code>.
411      * @throws NullPointerException
412      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
413      *             required by the Java platform specification.
414      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
415      */
416     public static String newStringUtf8(final byte[] bytes) {
417         return newString(bytes, Charsets.UTF_8);
418     }
419 
420 }