View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.miloss.fgsms.common.codec;
18  
19  import java.util.Arrays;
20  
21  
22  /**
23   * Abstract superclass for Base-N encoders and decoders.
24   *
25   * <p>
26   * This class is thread-safe.
27   * </p>
28   *
29   * @version $Id$
30   */
31  public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
32  
33      /**
34       * Holds thread context so classes can be thread-safe.
35       *
36       * This class is not itself thread-safe; each thread must allocate its own copy.
37       *
38       * @since 1.7
39       */
40      static class Context {
41  
42          /**
43           * Place holder for the bytes we're dealing with for our based logic.
44           * Bitwise operations store and extract the encoding or decoding from this variable.
45           */
46          int ibitWorkArea;
47  
48          /**
49           * Place holder for the bytes we're dealing with for our based logic.
50           * Bitwise operations store and extract the encoding or decoding from this variable.
51           */
52          long lbitWorkArea;
53  
54          /**
55           * Buffer for streaming.
56           */
57          byte[] buffer;
58  
59          /**
60           * Position where next character should be written in the buffer.
61           */
62          int pos;
63  
64          /**
65           * Position where next character should be read from the buffer.
66           */
67          int readPos;
68  
69          /**
70           * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
71           * and must be thrown away.
72           */
73          boolean eof;
74  
75          /**
76           * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
77           * it to make sure each encoded line never goes beyond lineLength (if lineLength &gt; 0).
78           */
79          int currentLinePos;
80  
81          /**
82           * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
83           * variable helps track that.
84           */
85          int modulus;
86  
87          Context() {
88          }
89  
90          /**
91           * Returns a String useful for debugging (especially within a debugger.)
92           *
93           * @return a String useful for debugging.
94           */
95          @SuppressWarnings("boxing") // OK to ignore boxing here
96          @Override
97          public String toString() {
98              return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
99                      "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
100                     currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
101         }
102     }
103 
104     /**
105      * EOF
106      *
107      * @since 1.7
108      */
109     static final int EOF = -1;
110 
111     /**
112      *  MIME chunk size per RFC 2045 section 6.8.
113      *
114      * <p>
115      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
116      * equal signs.
117      * </p>
118      *
119      * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
120      */
121     public static final int MIME_CHUNK_SIZE = 76;
122 
123     /**
124      * PEM chunk size per RFC 1421 section 4.3.2.4.
125      *
126      * <p>
127      * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
128      * equal signs.
129      * </p>
130      *
131      * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
132      */
133     public static final int PEM_CHUNK_SIZE = 64;
134 
135     private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
136 
137     /**
138      * Defines the default buffer size - currently {@value}
139      * - must be large enough for at least one encoded block+separator
140      */
141     private static final int DEFAULT_BUFFER_SIZE = 8192;
142 
143     /** Mask used to extract 8 bits, used in decoding bytes */
144     protected static final int MASK_8BITS = 0xff;
145 
146     /**
147      * Byte used to pad output.
148      */
149     protected static final byte PAD_DEFAULT = '='; // Allow static access to default
150 
151     /**
152      * @deprecated Use {@link #pad}. Will be removed in 2.0.
153      */
154     @Deprecated
155     protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
156 
157     protected final byte pad; // instance variable just in case it needs to vary later
158 
159     /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
160     private final int unencodedBlockSize;
161 
162     /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
163     private final int encodedBlockSize;
164 
165     /**
166      * Chunksize for encoding. Not used when decoding.
167      * A value of zero or less implies no chunking of the encoded data.
168      * Rounded down to nearest multiple of encodedBlockSize.
169      */
170     protected final int lineLength;
171 
172     /**
173      * Size of chunk separator. Not used unless {@link #lineLength} &gt; 0.
174      */
175     private final int chunkSeparatorLength;
176 
177     /**
178      * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
179      * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
180      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
181      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
182      * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
183      * @param chunkSeparatorLength the chunk separator length, if relevant
184      */
185     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
186                          final int lineLength, final int chunkSeparatorLength) {
187         this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
188     }
189 
190     /**
191      * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
192      * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
193      * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
194      * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
195      * @param lineLength if &gt; 0, use chunking with a length <code>lineLength</code>
196      * @param chunkSeparatorLength the chunk separator length, if relevant
197      * @param pad byte used as padding byte.
198      */
199     protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
200                          final int lineLength, final int chunkSeparatorLength, final byte pad) {
201         this.unencodedBlockSize = unencodedBlockSize;
202         this.encodedBlockSize = encodedBlockSize;
203         final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
204         this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
205         this.chunkSeparatorLength = chunkSeparatorLength;
206 
207         this.pad = pad;
208     }
209 
210     /**
211      * Returns true if this object has buffered data for reading.
212      *
213      * @param context the context to be used
214      * @return true if there is data still available for reading.
215      */
216     boolean hasData(final Context context) {  // package protected for access from I/O streams
217         return context.buffer != null;
218     }
219 
220     /**
221      * Returns the amount of buffered data available for reading.
222      *
223      * @param context the context to be used
224      * @return The amount of buffered data available for reading.
225      */
226     int available(final Context context) {  // package protected for access from I/O streams
227         return context.buffer != null ? context.pos - context.readPos : 0;
228     }
229 
230     /**
231      * Get the default buffer size. Can be overridden.
232      *
233      * @return {@link #DEFAULT_BUFFER_SIZE}
234      */
235     protected int getDefaultBufferSize() {
236         return DEFAULT_BUFFER_SIZE;
237     }
238 
239     /**
240      * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
241      * @param context the context to be used
242      */
243     private byte[] resizeBuffer(final Context context) {
244         if (context.buffer == null) {
245             context.buffer = new byte[getDefaultBufferSize()];
246             context.pos = 0;
247             context.readPos = 0;
248         } else {
249             final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
250             System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
251             context.buffer = b;
252         }
253         return context.buffer;
254     }
255 
256     /**
257      * Ensure that the buffer has room for <code>size</code> bytes
258      *
259      * @param size minimum spare space required
260      * @param context the context to be used
261      * @return the buffer
262      */
263     protected byte[] ensureBufferSize(final int size, final Context context){
264         if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
265             return resizeBuffer(context);
266         }
267         return context.buffer;
268     }
269 
270     /**
271      * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
272      * bytes. Returns how many bytes were actually extracted.
273      * <p>
274      * Package protected for access from I/O streams.
275      *
276      * @param b
277      *            byte[] array to extract the buffered data into.
278      * @param bPos
279      *            position in byte[] array to start extraction at.
280      * @param bAvail
281      *            amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
282      * @param context
283      *            the context to be used
284      * @return The number of bytes successfully extracted into the provided byte[] array.
285      */
286     int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
287         if (context.buffer != null) {
288             final int len = Math.min(available(context), bAvail);
289             System.arraycopy(context.buffer, context.readPos, b, bPos, len);
290             context.readPos += len;
291             if (context.readPos >= context.pos) {
292                 context.buffer = null; // so hasData() will return false, and this method can return -1
293             }
294             return len;
295         }
296         return context.eof ? EOF : 0;
297     }
298 
299     /**
300      * Checks if a byte value is whitespace or not.
301      * Whitespace is taken to mean: space, tab, CR, LF
302      * @param byteToCheck
303      *            the byte to check
304      * @return true if byte is whitespace, false otherwise
305      */
306     protected static boolean isWhiteSpace(final byte byteToCheck) {
307         switch (byteToCheck) {
308             case ' ' :
309             case '\n' :
310             case '\r' :
311             case '\t' :
312                 return true;
313             default :
314                 return false;
315         }
316     }
317 
318     /**
319      * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
320      * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
321      *
322      * @param obj
323      *            Object to encode
324      * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
325      * @throws EncoderException
326      *             if the parameter supplied is not of type byte[]
327      */
328     @Override
329     public Object encode(final Object obj) throws EncoderException {
330         if (!(obj instanceof byte[])) {
331             throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
332         }
333         return encode((byte[]) obj);
334     }
335 
336     /**
337      * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
338      * Uses UTF8 encoding.
339      *
340      * @param pArray
341      *            a byte array containing binary data
342      * @return A String containing only Base-N character data
343      */
344     public String encodeToString(final byte[] pArray) {
345         return StringUtils.newStringUtf8(encode(pArray));
346     }
347 
348     /**
349      * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
350      * Uses UTF8 encoding.
351      *
352      * @param pArray a byte array containing binary data
353      * @return String containing only character data in the appropriate alphabet.
354     */
355     public String encodeAsString(final byte[] pArray){
356         return StringUtils.newStringUtf8(encode(pArray));
357     }
358 
359     /**
360      * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
361      * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
362      *
363      * @param obj
364      *            Object to decode
365      * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
366      *         supplied.
367      * @throws DecoderException
368      *             if the parameter supplied is not of type byte[]
369      */
370     @Override
371     public Object decode(final Object obj) throws DecoderException {
372         if (obj instanceof byte[]) {
373             return decode((byte[]) obj);
374         } else if (obj instanceof String) {
375             return decode((String) obj);
376         } else {
377             throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
378         }
379     }
380 
381     /**
382      * Decodes a String containing characters in the Base-N alphabet.
383      *
384      * @param pArray
385      *            A String containing Base-N character data
386      * @return a byte array containing binary data
387      */
388     public byte[] decode(final String pArray) {
389         return decode(StringUtils.getBytesUtf8(pArray));
390     }
391 
392     /**
393      * Decodes a byte[] containing characters in the Base-N alphabet.
394      *
395      * @param pArray
396      *            A byte array containing Base-N character data
397      * @return a byte array containing binary data
398      */
399     @Override
400     public byte[] decode(final byte[] pArray) {
401         if (pArray == null || pArray.length == 0) {
402             return pArray;
403         }
404         final Context context = new Context();
405         decode(pArray, 0, pArray.length, context);
406         decode(pArray, 0, EOF, context); // Notify decoder of EOF.
407         final byte[] result = new byte[context.pos];
408         readResults(result, 0, result.length, context);
409         return result;
410     }
411 
412     /**
413      * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
414      *
415      * @param pArray
416      *            a byte array containing binary data
417      * @return A byte array containing only the base N alphabetic character data
418      */
419     @Override
420     public byte[] encode(final byte[] pArray) {
421         if (pArray == null || pArray.length == 0) {
422             return pArray;
423         }
424         return encode(pArray, 0, pArray.length);
425     }
426 
427     /**
428      * Encodes a byte[] containing binary data, into a byte[] containing
429      * characters in the alphabet.
430      *
431      * @param pArray
432      *            a byte array containing binary data
433      * @param offset
434      *            initial offset of the subarray.
435      * @param length
436      *            length of the subarray.
437      * @return A byte array containing only the base N alphabetic character data
438      * @since 1.11
439      */
440     public byte[] encode(final byte[] pArray, int offset, int length) {
441         if (pArray == null || pArray.length == 0) {
442             return pArray;
443         }
444         final Context context = new Context();
445         encode(pArray, offset, length, context);
446         encode(pArray, offset, EOF, context); // Notify encoder of EOF.
447         final byte[] buf = new byte[context.pos - context.readPos];
448         readResults(buf, 0, buf.length, context);
449         return buf;
450     }
451 
452     // package protected for access from I/O streams
453     abstract void encode(byte[] pArray, int i, int length, Context context);
454 
455     // package protected for access from I/O streams
456     abstract void decode(byte[] pArray, int i, int length, Context context);
457 
458     /**
459      * Returns whether or not the <code>octet</code> is in the current alphabet.
460      * Does not allow whitespace or pad.
461      *
462      * @param value The value to test
463      *
464      * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
465      */
466     protected abstract boolean isInAlphabet(byte value);
467 
468     /**
469      * Tests a given byte array to see if it contains only valid characters within the alphabet.
470      * The method optionally treats whitespace and pad as valid.
471      *
472      * @param arrayOctet byte array to test
473      * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
474      *
475      * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
476      *         <code>false</code>, otherwise
477      */
478     public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
479         for (byte octet : arrayOctet) {
480             if (!isInAlphabet(octet) &&
481                     (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) {
482                 return false;
483             }
484         }
485         return true;
486     }
487 
488     /**
489      * Tests a given String to see if it contains only valid characters within the alphabet.
490      * The method treats whitespace and PAD as valid.
491      *
492      * @param basen String to test
493      * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
494      *         the String is empty; <code>false</code>, otherwise
495      * @see #isInAlphabet(byte[], boolean)
496      */
497     public boolean isInAlphabet(final String basen) {
498         return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
499     }
500 
501     /**
502      * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
503      *
504      * Intended for use in checking line-ending arrays
505      *
506      * @param arrayOctet
507      *            byte array to test
508      * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
509      */
510     protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
511         if (arrayOctet == null) {
512             return false;
513         }
514         for (final byte element : arrayOctet) {
515             if (pad == element || isInAlphabet(element)) {
516                 return true;
517             }
518         }
519         return false;
520     }
521 
522     /**
523      * Calculates the amount of space needed to encode the supplied array.
524      *
525      * @param pArray byte[] array which will later be encoded
526      *
527      * @return amount of space needed to encoded the supplied array.
528      * Returns a long since a max-len array will require &gt; Integer.MAX_VALUE
529      */
530     public long getEncodedLength(final byte[] pArray) {
531         // Calculate non-chunked size - rounded up to allow for padding
532         // cast to long is needed to avoid possibility of overflow
533         long len = ((pArray.length + unencodedBlockSize-1)  / unencodedBlockSize) * (long) encodedBlockSize;
534         if (lineLength > 0) { // We're using chunking
535             // Round up to nearest multiple
536             len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
537         }
538         return len;
539     }
540 }