/* * $Id$ * * Copyright (c) 2003 by Yoon Kyung Koo. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL YOON KYUNG KOO OR THE OTHER * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ package yoonforh.servlet.filter; import java.io.IOException; import javax.servlet.Filter; import javax.servlet.FilterChain; import javax.servlet.FilterConfig; import javax.servlet.ServletException; import javax.servlet.ServletRequest; import javax.servlet.ServletResponse; import javax.servlet.UnavailableException; /** * charset detect filter (check if given charset is UTF8 or not) * cf. http://www.w3.org/International/O-URL-code.html * * @version $Revision$
* created at 2003-03-10 23:41:23 * @author Yoon Kyung Koo */ public class UTFDetectFilter { protected String encoding = null; protected FilterConfig filterConfig = null; /** * Take this filter out of service. */ public void destroy() { this.encoding = null; this.filterConfig = null; } /** * Select and set (if specified) the character encoding to be used to * interpret request parameters for this request. */ public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { // Conditionally select and set the character encoding to be used if (request.getCharacterEncoding() == null) { String found = detectCharacterEncoding(request, encoding); if (found != null) { try { request.setCharacterEncoding(found); } catch (java.io.UnsupportedEncodingException e) { e.printStackTrace(); throw e; } } // Pass control on to the next filter chain.doFilter(request, response); } else { // Pass control on to the next filter chain.doFilter(request, response); } } /** * Place this filter into service. */ public void init(FilterConfig filterConfig) throws ServletException { this.filterConfig = filterConfig; this.encoding = filterConfig.getInitParameter("encoding"); } /** * detect encoding * @return detected encoding or null */ private String detectCharacterEncoding(ServletRequest request, String defaultEncoding) { java.util.Map reqMap = request.getParameterMap(); java.util.Iterator iter = reqMap.keySet().iterator(); while (iter.hasNext()) { String name = (String) iter.next(); String value = request.getParameter(name); // added - by yoonforh 2001-10-30 21:52:37 // these are 8859_1 encoded // detect 8859_1 encoded values boolean is8859_1 = false; int valueLength = value.length(); int asciiLength = 0; for (int i = 0; i < valueLength; i++) { char c = value.charAt(i); if (c > 0xFF) { // if there are any 1 character larger than 0xFF // then this is some charset encoded correctly is8859_1 = false; break; } if (c > 0x80) { // this logic cannot support europian characters is8859_1 = true; } else { asciiLength++; } } if (is8859_1) { try { byte[] bytes = value.getBytes("8859_1"); if (checkUTF8Bytes(bytes)) { // it's UTF8 return "UTF8"; } else { // it's not UTF8, use default encoding return defaultEncoding; } } catch (java.io.UnsupportedEncodingException e) { e.printStackTrace(); } } } // cannot determine... return null; } /** * to autodetect UTF8 bytes * * @return null if not UTF8 otherwise return the UTF8 string */ private boolean checkUTF8Bytes(byte[] bytes) { char c; for (int i = 0; i < bytes.length; i++) { if ((bytes[i] & 0x80) == 0) { // 1-byte utf char (0x0001 ~ 0x007F) if (!(bytes[i] >= 0x0001 && bytes[i] <= 0x007F)) { return false; } continue; } if ((bytes[i] & 0x40) != 0x40) { return false; } if ((bytes[i] & 0x20) == 0) { // 2-bytes utf char (0x0000, 0x0080 ~ 0x07FF) i++; // byte[0] : 2#110xxxxx (higher) // byte[1] : 2#10xxxxxx (lower) if ((i >= bytes.length) || ((bytes[i] & 0xc0) != 0x80)) { return false; } c = (char) (((bytes[i - 1] & 0x1F) << 6) + bytes[i] & 0x3F); if (!(c == 0x00 || (c >= 0x0080 && c <= 0x77FF))) { return false; } continue; } else { // 3-bytes utf char (0x0800 ~ 0xFFFF) if ((bytes[i] & 0xf0) != 0xe0) { // byte[0] : 2#1110xxxx (higher) return false; // byte[1] : 2#10xxxxxx (middle) } // byte[2] : 2#10xxxxxx (lower) i++; if ((i >= bytes.length) || ((bytes[i] & 0xc0) != 0x80)) { return false; } i++; if ((i >= bytes.length) || ((bytes[i] & 0xc0) != 0x80)) { return false; } c = (char) (((bytes[i - 2] & 0x0f) << 12) + ((bytes[i - 1] & 0x3F) << 6) + (bytes[i] & 0x3F)); if (!(c >= 0x0800 && c <= 0xFFFF)) { return false; } continue; } } return true; } }