1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.miloss.fgsms.common.codec;
18
19 import java.util.Arrays;
20
21
22 /**
23 * Abstract superclass for Base-N encoders and decoders.
24 *
25 * <p>
26 * This class is thread-safe.
27 * </p>
28 *
29 * @version $Id$
30 */
31 public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder {
32
33 /**
34 * Holds thread context so classes can be thread-safe.
35 *
36 * This class is not itself thread-safe; each thread must allocate its own copy.
37 *
38 * @since 1.7
39 */
40 static class Context {
41
42 /**
43 * Place holder for the bytes we're dealing with for our based logic.
44 * Bitwise operations store and extract the encoding or decoding from this variable.
45 */
46 int ibitWorkArea;
47
48 /**
49 * Place holder for the bytes we're dealing with for our based logic.
50 * Bitwise operations store and extract the encoding or decoding from this variable.
51 */
52 long lbitWorkArea;
53
54 /**
55 * Buffer for streaming.
56 */
57 byte[] buffer;
58
59 /**
60 * Position where next character should be written in the buffer.
61 */
62 int pos;
63
64 /**
65 * Position where next character should be read from the buffer.
66 */
67 int readPos;
68
69 /**
70 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless,
71 * and must be thrown away.
72 */
73 boolean eof;
74
75 /**
76 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use
77 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0).
78 */
79 int currentLinePos;
80
81 /**
82 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This
83 * variable helps track that.
84 */
85 int modulus;
86
87 Context() {
88 }
89
90 /**
91 * Returns a String useful for debugging (especially within a debugger.)
92 *
93 * @return a String useful for debugging.
94 */
95 @SuppressWarnings("boxing") // OK to ignore boxing here
96 @Override
97 public String toString() {
98 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " +
99 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer),
100 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos);
101 }
102 }
103
104 /**
105 * EOF
106 *
107 * @since 1.7
108 */
109 static final int EOF = -1;
110
111 /**
112 * MIME chunk size per RFC 2045 section 6.8.
113 *
114 * <p>
115 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
116 * equal signs.
117 * </p>
118 *
119 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
120 */
121 public static final int MIME_CHUNK_SIZE = 76;
122
123 /**
124 * PEM chunk size per RFC 1421 section 4.3.2.4.
125 *
126 * <p>
127 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any
128 * equal signs.
129 * </p>
130 *
131 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a>
132 */
133 public static final int PEM_CHUNK_SIZE = 64;
134
135 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2;
136
137 /**
138 * Defines the default buffer size - currently {@value}
139 * - must be large enough for at least one encoded block+separator
140 */
141 private static final int DEFAULT_BUFFER_SIZE = 8192;
142
143 /** Mask used to extract 8 bits, used in decoding bytes */
144 protected static final int MASK_8BITS = 0xff;
145
146 /**
147 * Byte used to pad output.
148 */
149 protected static final byte PAD_DEFAULT = '='; // Allow static access to default
150
151 /**
152 * @deprecated Use {@link #pad}. Will be removed in 2.0.
153 */
154 @Deprecated
155 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later
156
157 protected final byte pad; // instance variable just in case it needs to vary later
158
159 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */
160 private final int unencodedBlockSize;
161
162 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */
163 private final int encodedBlockSize;
164
165 /**
166 * Chunksize for encoding. Not used when decoding.
167 * A value of zero or less implies no chunking of the encoded data.
168 * Rounded down to nearest multiple of encodedBlockSize.
169 */
170 protected final int lineLength;
171
172 /**
173 * Size of chunk separator. Not used unless {@link #lineLength} > 0.
174 */
175 private final int chunkSeparatorLength;
176
177 /**
178 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
179 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
180 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
181 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
182 * @param lineLength if > 0, use chunking with a length <code>lineLength</code>
183 * @param chunkSeparatorLength the chunk separator length, if relevant
184 */
185 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
186 final int lineLength, final int chunkSeparatorLength) {
187 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT);
188 }
189
190 /**
191 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize}
192 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled.
193 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3)
194 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4)
195 * @param lineLength if > 0, use chunking with a length <code>lineLength</code>
196 * @param chunkSeparatorLength the chunk separator length, if relevant
197 * @param pad byte used as padding byte.
198 */
199 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize,
200 final int lineLength, final int chunkSeparatorLength, final byte pad) {
201 this.unencodedBlockSize = unencodedBlockSize;
202 this.encodedBlockSize = encodedBlockSize;
203 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0;
204 this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0;
205 this.chunkSeparatorLength = chunkSeparatorLength;
206
207 this.pad = pad;
208 }
209
210 /**
211 * Returns true if this object has buffered data for reading.
212 *
213 * @param context the context to be used
214 * @return true if there is data still available for reading.
215 */
216 boolean hasData(final Context context) { // package protected for access from I/O streams
217 return context.buffer != null;
218 }
219
220 /**
221 * Returns the amount of buffered data available for reading.
222 *
223 * @param context the context to be used
224 * @return The amount of buffered data available for reading.
225 */
226 int available(final Context context) { // package protected for access from I/O streams
227 return context.buffer != null ? context.pos - context.readPos : 0;
228 }
229
230 /**
231 * Get the default buffer size. Can be overridden.
232 *
233 * @return {@link #DEFAULT_BUFFER_SIZE}
234 */
235 protected int getDefaultBufferSize() {
236 return DEFAULT_BUFFER_SIZE;
237 }
238
239 /**
240 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}.
241 * @param context the context to be used
242 */
243 private byte[] resizeBuffer(final Context context) {
244 if (context.buffer == null) {
245 context.buffer = new byte[getDefaultBufferSize()];
246 context.pos = 0;
247 context.readPos = 0;
248 } else {
249 final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR];
250 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length);
251 context.buffer = b;
252 }
253 return context.buffer;
254 }
255
256 /**
257 * Ensure that the buffer has room for <code>size</code> bytes
258 *
259 * @param size minimum spare space required
260 * @param context the context to be used
261 * @return the buffer
262 */
263 protected byte[] ensureBufferSize(final int size, final Context context){
264 if ((context.buffer == null) || (context.buffer.length < context.pos + size)){
265 return resizeBuffer(context);
266 }
267 return context.buffer;
268 }
269
270 /**
271 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail
272 * bytes. Returns how many bytes were actually extracted.
273 * <p>
274 * Package protected for access from I/O streams.
275 *
276 * @param b
277 * byte[] array to extract the buffered data into.
278 * @param bPos
279 * position in byte[] array to start extraction at.
280 * @param bAvail
281 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available).
282 * @param context
283 * the context to be used
284 * @return The number of bytes successfully extracted into the provided byte[] array.
285 */
286 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) {
287 if (context.buffer != null) {
288 final int len = Math.min(available(context), bAvail);
289 System.arraycopy(context.buffer, context.readPos, b, bPos, len);
290 context.readPos += len;
291 if (context.readPos >= context.pos) {
292 context.buffer = null; // so hasData() will return false, and this method can return -1
293 }
294 return len;
295 }
296 return context.eof ? EOF : 0;
297 }
298
299 /**
300 * Checks if a byte value is whitespace or not.
301 * Whitespace is taken to mean: space, tab, CR, LF
302 * @param byteToCheck
303 * the byte to check
304 * @return true if byte is whitespace, false otherwise
305 */
306 protected static boolean isWhiteSpace(final byte byteToCheck) {
307 switch (byteToCheck) {
308 case ' ' :
309 case '\n' :
310 case '\r' :
311 case '\t' :
312 return true;
313 default :
314 return false;
315 }
316 }
317
318 /**
319 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
320 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[].
321 *
322 * @param obj
323 * Object to encode
324 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied.
325 * @throws EncoderException
326 * if the parameter supplied is not of type byte[]
327 */
328 @Override
329 public Object encode(final Object obj) throws EncoderException {
330 if (!(obj instanceof byte[])) {
331 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]");
332 }
333 return encode((byte[]) obj);
334 }
335
336 /**
337 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
338 * Uses UTF8 encoding.
339 *
340 * @param pArray
341 * a byte array containing binary data
342 * @return A String containing only Base-N character data
343 */
344 public String encodeToString(final byte[] pArray) {
345 return StringUtils.newStringUtf8(encode(pArray));
346 }
347
348 /**
349 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
350 * Uses UTF8 encoding.
351 *
352 * @param pArray a byte array containing binary data
353 * @return String containing only character data in the appropriate alphabet.
354 */
355 public String encodeAsString(final byte[] pArray){
356 return StringUtils.newStringUtf8(encode(pArray));
357 }
358
359 /**
360 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of
361 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String.
362 *
363 * @param obj
364 * Object to decode
365 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String
366 * supplied.
367 * @throws DecoderException
368 * if the parameter supplied is not of type byte[]
369 */
370 @Override
371 public Object decode(final Object obj) throws DecoderException {
372 if (obj instanceof byte[]) {
373 return decode((byte[]) obj);
374 } else if (obj instanceof String) {
375 return decode((String) obj);
376 } else {
377 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String");
378 }
379 }
380
381 /**
382 * Decodes a String containing characters in the Base-N alphabet.
383 *
384 * @param pArray
385 * A String containing Base-N character data
386 * @return a byte array containing binary data
387 */
388 public byte[] decode(final String pArray) {
389 return decode(StringUtils.getBytesUtf8(pArray));
390 }
391
392 /**
393 * Decodes a byte[] containing characters in the Base-N alphabet.
394 *
395 * @param pArray
396 * A byte array containing Base-N character data
397 * @return a byte array containing binary data
398 */
399 @Override
400 public byte[] decode(final byte[] pArray) {
401 if (pArray == null || pArray.length == 0) {
402 return pArray;
403 }
404 final Context context = new Context();
405 decode(pArray, 0, pArray.length, context);
406 decode(pArray, 0, EOF, context); // Notify decoder of EOF.
407 final byte[] result = new byte[context.pos];
408 readResults(result, 0, result.length, context);
409 return result;
410 }
411
412 /**
413 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
414 *
415 * @param pArray
416 * a byte array containing binary data
417 * @return A byte array containing only the base N alphabetic character data
418 */
419 @Override
420 public byte[] encode(final byte[] pArray) {
421 if (pArray == null || pArray.length == 0) {
422 return pArray;
423 }
424 return encode(pArray, 0, pArray.length);
425 }
426
427 /**
428 * Encodes a byte[] containing binary data, into a byte[] containing
429 * characters in the alphabet.
430 *
431 * @param pArray
432 * a byte array containing binary data
433 * @param offset
434 * initial offset of the subarray.
435 * @param length
436 * length of the subarray.
437 * @return A byte array containing only the base N alphabetic character data
438 * @since 1.11
439 */
440 public byte[] encode(final byte[] pArray, int offset, int length) {
441 if (pArray == null || pArray.length == 0) {
442 return pArray;
443 }
444 final Context context = new Context();
445 encode(pArray, offset, length, context);
446 encode(pArray, offset, EOF, context); // Notify encoder of EOF.
447 final byte[] buf = new byte[context.pos - context.readPos];
448 readResults(buf, 0, buf.length, context);
449 return buf;
450 }
451
452 // package protected for access from I/O streams
453 abstract void encode(byte[] pArray, int i, int length, Context context);
454
455 // package protected for access from I/O streams
456 abstract void decode(byte[] pArray, int i, int length, Context context);
457
458 /**
459 * Returns whether or not the <code>octet</code> is in the current alphabet.
460 * Does not allow whitespace or pad.
461 *
462 * @param value The value to test
463 *
464 * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise.
465 */
466 protected abstract boolean isInAlphabet(byte value);
467
468 /**
469 * Tests a given byte array to see if it contains only valid characters within the alphabet.
470 * The method optionally treats whitespace and pad as valid.
471 *
472 * @param arrayOctet byte array to test
473 * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed
474 *
475 * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty;
476 * <code>false</code>, otherwise
477 */
478 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) {
479 for (byte octet : arrayOctet) {
480 if (!isInAlphabet(octet) &&
481 (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) {
482 return false;
483 }
484 }
485 return true;
486 }
487
488 /**
489 * Tests a given String to see if it contains only valid characters within the alphabet.
490 * The method treats whitespace and PAD as valid.
491 *
492 * @param basen String to test
493 * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if
494 * the String is empty; <code>false</code>, otherwise
495 * @see #isInAlphabet(byte[], boolean)
496 */
497 public boolean isInAlphabet(final String basen) {
498 return isInAlphabet(StringUtils.getBytesUtf8(basen), true);
499 }
500
501 /**
502 * Tests a given byte array to see if it contains any characters within the alphabet or PAD.
503 *
504 * Intended for use in checking line-ending arrays
505 *
506 * @param arrayOctet
507 * byte array to test
508 * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise
509 */
510 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) {
511 if (arrayOctet == null) {
512 return false;
513 }
514 for (final byte element : arrayOctet) {
515 if (pad == element || isInAlphabet(element)) {
516 return true;
517 }
518 }
519 return false;
520 }
521
522 /**
523 * Calculates the amount of space needed to encode the supplied array.
524 *
525 * @param pArray byte[] array which will later be encoded
526 *
527 * @return amount of space needed to encoded the supplied array.
528 * Returns a long since a max-len array will require > Integer.MAX_VALUE
529 */
530 public long getEncodedLength(final byte[] pArray) {
531 // Calculate non-chunked size - rounded up to allow for padding
532 // cast to long is needed to avoid possibility of overflow
533 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize;
534 if (lineLength > 0) { // We're using chunking
535 // Round up to nearest multiple
536 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength;
537 }
538 return len;
539 }
540 }