| 1 | /* |
| 2 | * jDTAUS Core Utilities |
| 3 | * Copyright (C) 2005 Christian Schulte |
| 4 | * <cs@schulte.it> |
| 5 | * |
| 6 | * This library is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU Lesser General Public |
| 8 | * License as published by the Free Software Foundation; either |
| 9 | * version 2.1 of the License, or any later version. |
| 10 | * |
| 11 | * This library is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | * Lesser General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU Lesser General Public |
| 17 | * License along with this library; if not, write to the Free Software |
| 18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 19 | * |
| 20 | */ |
| 21 | package org.jdtaus.core.nio.util; |
| 22 | |
| 23 | import java.io.BufferedReader; |
| 24 | import java.io.IOException; |
| 25 | import java.io.InputStream; |
| 26 | import java.io.InputStreamReader; |
| 27 | import java.net.URL; |
| 28 | import java.nio.ByteBuffer; |
| 29 | import java.nio.CharBuffer; |
| 30 | import java.nio.charset.Charset; |
| 31 | import java.nio.charset.spi.CharsetProvider; |
| 32 | import java.util.Enumeration; |
| 33 | import java.util.HashMap; |
| 34 | import java.util.Iterator; |
| 35 | import java.util.LinkedList; |
| 36 | import java.util.List; |
| 37 | import java.util.Map; |
| 38 | |
| 39 | /** |
| 40 | * Charset coder and decoder utility. |
| 41 | * <p>This class extends the former charset provider implementations which |
| 42 | * cannot be used in every environment (e.g. WebStart, Maven) without |
| 43 | * installation in the JRE extensions directory where they are available to the |
| 44 | * system classloader. It uses the same service provider files as the |
| 45 | * platform implementation ({@code java.nio.charset.spi.CharsetProvider}) but |
| 46 | * is capable of using the current thread's classloader before falling back |
| 47 | * to the system classloader for loading {@code CharsetProvider} classes.</p> |
| 48 | * |
| 49 | * @author <a href="mailto:cs@schulte.it">Christian Schulte</a> |
| 50 | * @version $JDTAUS: Charsets.java 8743 2012-10-07 03:06:20Z schulte $ |
| 51 | */ |
| 52 | public class Charsets |
| 53 | { |
| 54 | //--Charsets---------------------------------------------------------------- |
| 55 | |
| 56 | /** Cached {@code CharsetProvider} instances. */ |
| 57 | private static final List providers = new LinkedList(); |
| 58 | |
| 59 | /** Cached {@code Charset} instances by name. */ |
| 60 | private static final Map charsets = new HashMap( 100 ); |
| 61 | |
| 62 | /** Private constructor. */ |
| 63 | private Charsets() |
| 64 | { |
| 65 | super(); |
| 66 | } |
| 67 | |
| 68 | /** |
| 69 | * Gets a charset for the given name. |
| 70 | * |
| 71 | * @param name the name of the charset to return. |
| 72 | * |
| 73 | * @return a {@code Charset} corresponding to {@code name} or {@code null} |
| 74 | * if no such {@code Charset} is available. |
| 75 | * |
| 76 | * @throws IOException if reading the service provider files fails. |
| 77 | * @throws ClassNotFoundException if a service provider file defines |
| 78 | * a class which cannot be loaded. |
| 79 | * @throws InstantiationException if creating an instance of a |
| 80 | * {@code CharsetProvider} fails. |
| 81 | * @throws IllegalAccessException if a {@code CharsetProvider} class |
| 82 | * does not define a public no-arg constructor. |
| 83 | * @throws java.nio.charset.IllegalCharsetNameException if {@code name} is |
| 84 | * no valid charset name. |
| 85 | * @throws java.nio.charset.UnsupportedCharsetException if {@code name} is |
| 86 | * not supported. |
| 87 | */ |
| 88 | private static Charset getCharset( final String name ) |
| 89 | throws IOException, ClassNotFoundException, InstantiationException, |
| 90 | IllegalAccessException |
| 91 | { |
| 92 | // Populate the provider list with available providers if it is empty. |
| 93 | if ( providers.size() == 0 ) |
| 94 | { |
| 95 | synchronized ( Charsets.class ) |
| 96 | { |
| 97 | // Use the current thread's context classloader if available or |
| 98 | // fall back to the system classloader. |
| 99 | ClassLoader classLoader = Thread.currentThread(). |
| 100 | getContextClassLoader(); |
| 101 | |
| 102 | if ( classLoader == null ) |
| 103 | { |
| 104 | classLoader = ClassLoader.getSystemClassLoader(); |
| 105 | } |
| 106 | |
| 107 | assert classLoader != null : |
| 108 | "Expected system classloader to always be available."; |
| 109 | |
| 110 | // Read all service provider files and load all defined |
| 111 | // provider classes. |
| 112 | final Enumeration providerFiles = classLoader.getResources( |
| 113 | "META-INF/services/java.nio.charset.spi.CharsetProvider" ); |
| 114 | |
| 115 | if ( providerFiles != null ) |
| 116 | { |
| 117 | while ( providerFiles.hasMoreElements() ) |
| 118 | { |
| 119 | final URL url = ( URL ) providerFiles.nextElement(); |
| 120 | BufferedReader reader = null; |
| 121 | |
| 122 | try |
| 123 | { |
| 124 | String line; |
| 125 | reader = new BufferedReader( |
| 126 | new InputStreamReader( url.openStream(), |
| 127 | "UTF-8" ) ); |
| 128 | |
| 129 | while ( ( line = reader.readLine() ) != null ) |
| 130 | { |
| 131 | // Check that the line denotes a valid Java |
| 132 | // classname and load that class using |
| 133 | // reflection. |
| 134 | if ( line.indexOf( '#' ) < 0 ) |
| 135 | { |
| 136 | providers.add( |
| 137 | classLoader.loadClass( line ). |
| 138 | newInstance() ); |
| 139 | |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | reader.close(); |
| 144 | reader = null; |
| 145 | } |
| 146 | finally |
| 147 | { |
| 148 | if ( reader != null ) |
| 149 | { |
| 150 | reader.close(); |
| 151 | } |
| 152 | } |
| 153 | } |
| 154 | } |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | // Search cached charsets. |
| 159 | Charset charset = ( Charset ) charsets.get( name ); |
| 160 | if ( charset == null ) |
| 161 | { |
| 162 | synchronized ( Charsets.class ) |
| 163 | { |
| 164 | // Search all available providers for a charset matching "name". |
| 165 | for ( final Iterator it = providers.iterator(); it.hasNext();) |
| 166 | { |
| 167 | charset = |
| 168 | ( ( CharsetProvider ) it.next() ).charsetForName( name ); |
| 169 | |
| 170 | if ( charset != null ) |
| 171 | { |
| 172 | charsets.put( name, charset ); |
| 173 | break; |
| 174 | } |
| 175 | } |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | // Fall back to platform charsets if nothing is found so far. |
| 180 | if ( charset == null ) |
| 181 | { |
| 182 | synchronized ( Charsets.class ) |
| 183 | { |
| 184 | charset = Charset.forName( name ); |
| 185 | charsets.put( name, charset ); |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | return charset; |
| 190 | } |
| 191 | |
| 192 | /** |
| 193 | * Encodes a given string to an array of bytes representing the characters |
| 194 | * of the string in a given charset. |
| 195 | * |
| 196 | * @param str the string to encode. |
| 197 | * @param charset the name of the charset to use. |
| 198 | * |
| 199 | * @throws NullPointerException if {@code str} or {@code charset} is |
| 200 | * {@code null}. |
| 201 | * @throws java.nio.charset.IllegalCharsetNameException if {@code charset} |
| 202 | * is no valid charset name. |
| 203 | * @throws java.nio.charset.UnsupportedCharsetException if {@code charset} |
| 204 | * is not supported. |
| 205 | */ |
| 206 | public static byte[] encode( final String str, final String charset ) |
| 207 | { |
| 208 | if ( str == null ) |
| 209 | { |
| 210 | throw new NullPointerException( "str" ); |
| 211 | } |
| 212 | if ( charset == null ) |
| 213 | { |
| 214 | throw new NullPointerException( "charset" ); |
| 215 | } |
| 216 | |
| 217 | final byte[] ret; |
| 218 | try |
| 219 | { |
| 220 | final Charset cset = Charsets.getCharset( charset ); |
| 221 | final ByteBuffer buf = cset.encode( str ); |
| 222 | |
| 223 | if ( buf.hasArray() ) |
| 224 | { |
| 225 | if ( buf.array().length == buf.limit() ) |
| 226 | { |
| 227 | ret = buf.array(); |
| 228 | } |
| 229 | else |
| 230 | { |
| 231 | ret = new byte[ buf.limit() ]; |
| 232 | System.arraycopy( buf.array(), buf.arrayOffset(), |
| 233 | ret, 0, ret.length ); |
| 234 | |
| 235 | } |
| 236 | } |
| 237 | else |
| 238 | { |
| 239 | ret = new byte[ buf.limit() ]; |
| 240 | buf.rewind(); |
| 241 | buf.get( ret ); |
| 242 | } |
| 243 | } |
| 244 | catch ( final ClassNotFoundException e ) |
| 245 | { |
| 246 | throw new AssertionError( e ); |
| 247 | } |
| 248 | catch ( final InstantiationException e ) |
| 249 | { |
| 250 | throw new AssertionError( e ); |
| 251 | } |
| 252 | catch ( final IllegalAccessException e ) |
| 253 | { |
| 254 | throw new AssertionError( e ); |
| 255 | } |
| 256 | catch ( final IOException e ) |
| 257 | { |
| 258 | throw new AssertionError( e ); |
| 259 | } |
| 260 | |
| 261 | return ret; |
| 262 | } |
| 263 | |
| 264 | /** |
| 265 | * Decodes the bytes of a given array to a string. |
| 266 | * |
| 267 | * @param bytes the bytes to decode. |
| 268 | * @param charset the name of the charset to use. |
| 269 | * |
| 270 | * @throws NullPointerException if {@code bytes} or {@code charset} is |
| 271 | * {@code null}. |
| 272 | * @throws java.nio.charset.IllegalCharsetNameException if {@code charset} |
| 273 | * is no valid charset name. |
| 274 | * @throws java.nio.charset.UnsupportedCharsetException if {@code charset} |
| 275 | * is not supported. |
| 276 | */ |
| 277 | public static String decode( final byte[] bytes, final String charset ) |
| 278 | { |
| 279 | if ( bytes == null ) |
| 280 | { |
| 281 | throw new NullPointerException( "bytes" ); |
| 282 | } |
| 283 | if ( charset == null ) |
| 284 | { |
| 285 | throw new NullPointerException( "charset" ); |
| 286 | } |
| 287 | |
| 288 | final String ret; |
| 289 | try |
| 290 | { |
| 291 | final Charset cset = Charsets.getCharset( charset ); |
| 292 | final CharBuffer buf = cset.decode( ByteBuffer.wrap( bytes ) ); |
| 293 | |
| 294 | if ( buf.hasArray() ) |
| 295 | { |
| 296 | ret = String.valueOf( buf.array(), buf.arrayOffset(), |
| 297 | buf.length() ); |
| 298 | |
| 299 | } |
| 300 | else |
| 301 | { |
| 302 | final char[] c = new char[ buf.length() ]; |
| 303 | buf.rewind(); |
| 304 | buf.get( c ); |
| 305 | ret = String.valueOf( c ); |
| 306 | } |
| 307 | } |
| 308 | catch ( final ClassNotFoundException e ) |
| 309 | { |
| 310 | throw new AssertionError( e ); |
| 311 | } |
| 312 | catch ( final InstantiationException e ) |
| 313 | { |
| 314 | throw new AssertionError( e ); |
| 315 | } |
| 316 | catch ( final IllegalAccessException e ) |
| 317 | { |
| 318 | throw new AssertionError( e ); |
| 319 | } |
| 320 | catch ( final IOException e ) |
| 321 | { |
| 322 | throw new AssertionError( e ); |
| 323 | } |
| 324 | |
| 325 | return ret; |
| 326 | } |
| 327 | |
| 328 | /** |
| 329 | * Decodes the bytes of a given array to a string. |
| 330 | * |
| 331 | * @param bytes the bytes to decode. |
| 332 | * @param off the offset from where to start decoding. |
| 333 | * @param count the number of bytes to decode starting at {@code offset}. |
| 334 | * @param charset the name of the charset to use. |
| 335 | * |
| 336 | * @throws NullPointerException if {@code bytes} or {@code charset} is |
| 337 | * {@code null}. |
| 338 | * @throws IndexOutOfBoundsException if {@code off} is negative or greater |
| 339 | * than the length of {@code bytes} or {@code off + count} is negative or |
| 340 | * greater than the length of {@code bytes}. |
| 341 | * @throws java.nio.charset.IllegalCharsetNameException if {@code charset} |
| 342 | * is no valid charset name. |
| 343 | * @throws java.nio.charset.UnsupportedCharsetException if {@code charset} |
| 344 | * is not supported. |
| 345 | */ |
| 346 | public static String decode( final byte[] bytes, final int off, |
| 347 | final int count, final String charset ) |
| 348 | { |
| 349 | if ( bytes == null ) |
| 350 | { |
| 351 | throw new NullPointerException( "bytes" ); |
| 352 | } |
| 353 | if ( charset == null ) |
| 354 | { |
| 355 | throw new NullPointerException( "charset" ); |
| 356 | } |
| 357 | if ( off < 0 || off >= bytes.length ) |
| 358 | { |
| 359 | throw new ArrayIndexOutOfBoundsException( off ); |
| 360 | } |
| 361 | if ( count < 0 || off + count >= bytes.length ) |
| 362 | { |
| 363 | throw new ArrayIndexOutOfBoundsException( count + off ); |
| 364 | } |
| 365 | |
| 366 | final String ret; |
| 367 | try |
| 368 | { |
| 369 | final Charset cset = Charsets.getCharset( charset ); |
| 370 | final CharBuffer buf = cset.decode( |
| 371 | ByteBuffer.wrap( bytes, off, count ) ); |
| 372 | |
| 373 | if ( buf.hasArray() ) |
| 374 | { |
| 375 | ret = String.valueOf( buf.array(), buf.arrayOffset(), |
| 376 | buf.length() ); |
| 377 | |
| 378 | } |
| 379 | else |
| 380 | { |
| 381 | final char[] c = new char[ buf.length() ]; |
| 382 | buf.rewind(); |
| 383 | buf.get( c ); |
| 384 | ret = String.valueOf( c ); |
| 385 | } |
| 386 | } |
| 387 | catch ( final ClassNotFoundException e ) |
| 388 | { |
| 389 | throw new AssertionError( e ); |
| 390 | } |
| 391 | catch ( final InstantiationException e ) |
| 392 | { |
| 393 | throw new AssertionError( e ); |
| 394 | } |
| 395 | catch ( final IllegalAccessException e ) |
| 396 | { |
| 397 | throw new AssertionError( e ); |
| 398 | } |
| 399 | catch ( final IOException e ) |
| 400 | { |
| 401 | throw new AssertionError( e ); |
| 402 | } |
| 403 | |
| 404 | return ret; |
| 405 | } |
| 406 | |
| 407 | //----------------------------------------------------------------Charsets-- |
| 408 | } |