天天看点

Java字符集编码与转码

Java字符的class代码总是Unicode字符集的UTF-16编码,运行时内存中的字符串在没有指定编码的时候也总是Unicode编码。

Java编译时候,会将java文件的编码按照指定编码或者(系统默认的)编码转换为Unicode并加载到内存中进行编译。

下面给出一个Java转码工具,没有测试过,呵呵:

package lavasoft.common; 

import org.apache.commons.logging.Log; 

import org.apache.commons.logging.LogFactory; 

import java.io.*; 

/** 

* 转码工具,全面支持文件、字符串的转码 

* @author Administrator 2009-11-29 16:14:21 

*/ 

public class EncodingToolkit { 

        private static Log log = LogFactory.getLog(EncodingToolkit.class); 

        public static void main(String[] args) { 

                String han = "汉"; 

                System.out.println("---------"); 

        } 

        /** 

         * 对字符串重新编码 

         * 

         * @param text                字符串 

         * @param resEncoding 源编码 

         * @param newEncoding 新编码 

         * @return 重新编码后的字符串 

         */ 

        public static String reEncoding(String text, String resEncoding, String newEncoding) { 

                String rs = null; 

                try { 

                        rs = new String(text.getBytes(resEncoding), newEncoding); 

                } catch (UnsupportedEncodingException e) { 

                        log.error("读取文件为一个内存字符串失败,失败原因是使用了不支持的字符编码"); 

                        throw new RuntimeException(e); 

                } 

                return rs; 

         * 重新编码Unicode字符串 

         * @param text                源字符串 

         * @param newEncoding 新的编码 

         * @return 指定编码的字符串 

        public static String reEncoding(String text, String newEncoding) { 

                        rs = new String(text.getBytes(), newEncoding); 

                        log.error("读取文件为一个内存字符串失败,失败原因是使用了不支持的字符编码" + newEncoding); 

         * 文本文件重新编码 

         * @param resFile         源文件 

         * @param resEncoding 源文件编码 

         * @param distFile        目标文件 

         * @param newEncoding 目标文件编码 

         * @return 转码成功时候返回ture,否则false 

        public static boolean reEncoding(File resFile, String resEncoding, File distFile, String newEncoding) { 

                boolean flag = true; 

                InputStreamReader reader = null; 

                OutputStreamWriter writer = null; 

                        reader = new InputStreamReader(new FileInputStream(resFile), resEncoding); 

                        writer = new OutputStreamWriter(new FileOutputStream(distFile), newEncoding); 

                        char buf[] = new char[1024 * 64];         //字符缓冲区 

                        int len; 

                        while ((len = reader.read(buf)) != -1) { 

                                writer.write(buf, 0, len); 

                        } 

                        writer.flush(); 

                        writer.close(); 

                        reader.close(); 

                } catch (FileNotFoundException e) { 

                        flag = false; 

                        log.error("没有找到文件,转码发生异常!"); 

                } catch (IOException e) { 

                        log.error("读取文件为一个内存字符串失败,失败原因是读取文件异常!"); 

                } finally { 

                        if (reader != null) try { 

                                reader.close(); 

                        } catch (IOException e) { 

                                flag = false; 

                                throw new RuntimeException(e); 

                        } finally { 

                                if (writer != null) try { 

                                        writer.close(); 

                                } catch (IOException e) { 

                                        flag = false; 

                                        throw new RuntimeException(e); 

                                } 

                return flag; 

         * 读取文件为一个Unicode编码的内存字符串,保持文件原有的换行格式 

         * @param resFile    源文件对象 

         * @param encoding 文件字符集编码 

         * @return 文件内容的Unicode字符串 

        public static String file2String(File resFile, String encoding) { 

                StringBuffer sb = new StringBuffer(); 

                        LineNumberReader reader = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(resFile), encoding))); 

                        String line; 

                        while ((line = reader.readLine()) != null) { 

                                sb.append(line).append(System.getProperty("line.separator")); 

                        log.error("读取文件为一个内存字符串失败,失败原因是使用了不支持的字符编码" + encoding); 

                        log.error("读取文件为一个内存字符串失败,失败原因所给的文件" + resFile + "不存在!"); 

                return sb.toString(); 

         * 使用指定编码读取输入流为一个内存Unicode字符串,保持文件原有的换行格式 

         * @param in             输入流 

         * @param encoding 构建字符流时候使用的字符编码 

         * @return Unicode字符串 

        public static String stream2String(InputStream in, String encoding) { 

                LineNumberReader reader = null; 

                        reader = new LineNumberReader(new BufferedReader(new InputStreamReader(in, encoding))); 

                        in.close(); 

                        if (in != null) try { 

                                in.close(); 

                                log.error("关闭输入流发生异常!", e); 

         * 字符串保存为制定编码的文本文件 

         * @param text         字符串 

         * @param distFile 目标文件 

         * @param encoding 目标文件的编码 

         * @return 转换成功时候返回ture,否则false 

        public static boolean string2TextFile(String text, File distFile, String encoding) { 

                if (!distFile.getParentFile().exists()) distFile.getParentFile().mkdirs(); 

                        writer = new OutputStreamWriter(new FileOutputStream(distFile), encoding); 

                        writer.write(text); 

                        log.error("将字符串写入文件发生异常!"); 

                        if (writer != null) try { 

                                writer.close(); 

                                log.error("关闭输出流发生异常!", e); 

}

本文转自 leizhimin 51CTO博客,原文链接:http://blog.51cto.com/lavasoft/236392,如需转载请自行联系原作者