用AI生成的一段穷举编码转换的java代码
By:Roy.LiuLast updated:2025-12-02
从一个数据库导入数据到另外一个数据库的时候,发现了很多中文都是乱码,为了搞清楚这些乱码的可能编码转换关系,用AI生成了一段检查代码。果然找到了正确的编码转换关系

具体AI生成的代码如下:
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
public class EncodingFixer {
// 常用待测编码列表
private static final String[] ENCODINGS = {
"UTF-8", "UTF-16LE", "UTF-16BE", "UTF-16",
"GB18030", "GBK", "GB2312", "CP936",
"Big5", "ISO-8859-1", "Windows-1252", "US-ASCII"
};
public static void main1(String[] args) throws Exception {
if (args.length == 0) {
printUsage();
return;
}
String cmd = args[0].toLowerCase();
switch (cmd) {
case "convert":
// convert inputFile outputFile srcEncoding dstEncoding
if (args.length != 5) {
System.err.println("convert 需要 4 个参数: convert <input> <output> <srcEnc> <dstEnc>");
return;
}
convertFile(args[1], args[2], args[3], args[4]);
break;
case "tryfix-string":
// tryfix-string "someString"
if (args.length != 2) {
System.err.println("tryfix-string 需要 1 个参数: tryfix-string \"文本\"");
return;
}
tryFixString(args[1]);
break;
case "tryfix-file":
// tryfix-file inputFile
if (args.length != 2) {
System.err.println("tryfix-file 需要 1 个参数: tryfix-file <inputFile>");
return;
}
tryFixFile(args[1]);
break;
default:
printUsage();
}
}
private static void printUsage() {
System.out.println("EncodingFixer 用法:");
System.out.println(" java EncodingFixer convert <inputFile> <outputFile> <srcEncoding> <dstEncoding>");
System.out.println(" - 把 inputFile 从 srcEncoding 转为 dstEncoding 并写到 outputFile");
System.out.println(" - 例如:convert exported.csv fixed.csv Big5 UTF-8");
System.out.println();
System.out.println(" java EncodingFixer tryfix-string \"文本\"");
System.out.println(" - 对单个显示错的字符串穷举各种编码组合并输出候选(便于确定哪种解码链路能恢复正确文本)");
System.out.println();
System.out.println(" java EncodingFixer tryfix-file <inputFile>");
System.out.println(" - 把文件原始 bytes 按多种编码直接解码并显示每种解码的前几行,帮助判断文件实际编码");
System.out.println();
System.out.println("常用编码: " + Arrays.toString(ENCODINGS));
}
// 将文件从 srcEnc 转为 dstEnc
private static void convertFile(String inputPath, String outputPath, String srcEnc, String dstEnc) throws IOException {
Path in = Paths.get(inputPath);
Path out = Paths.get(outputPath);
byte[] raw = Files.readAllBytes(in);
Charset src = Charset.forName(srcEnc);
Charset dst = Charset.forName(dstEnc);
String text = new String(raw, src); // 用 src 解释原 bytes
Files.write(out, text.getBytes(dst)); // 用 dst 写入
System.out.printf("转换完成: %s (%s) -> %s (%s)%n", inputPath, srcEnc, outputPath, dstEnc);
}
// 对单个字符串尝试各种 编码(encode) -> 解码(decode) 组合
private static void tryFixString(String s) {
System.out.println("尝试穷举编码组合 (encode by A, decode by B) 来修复字符串。原始显示:" + s);
System.out.println("---------- 结果候选 (仅列出看起来包含汉字或全部 ASCII 的项) ----------");
for (String encFrom : ENCODINGS) {
for (String encTo : ENCODINGS) {
try {
byte[] bytes = s.getBytes(Charset.forName(encFrom));
String candidate = new String(bytes, Charset.forName(encTo));
if (looksReasonable(candidate)) {
System.out.printf("%-12s -> %-12s : %s%n", encFrom, encTo, candidate);
}
} catch (Exception e) {
// 某些编码组合可能抛异常,忽略
}
}
}
System.out.println("---------- 完成 ----------");
}
// 对文件做多编码直接解码,展示每种解码的前若干字符(用于判断文件实际编码)
private static void tryFixFile(String inputPath) throws IOException {
Path in = Paths.get(inputPath);
byte[] raw = Files.readAllBytes(in);
System.out.println("文件大小: " + raw.length + " bytes");
System.out.println("尝试把文件的 bytes 按下列编码解释,并显示前 400 个字符:");
for (String enc : ENCODINGS) {
try {
String decoded = new String(raw, Charset.forName(enc));
String snippet = decoded.length() > 400 ? decoded.substring(0, 400) : decoded;
System.out.println("---- decode as " + enc + " ----");
System.out.println(snippet);
} catch (Exception e) {
System.out.println("---- decode as " + enc + " ----");
System.out.println("解码失败: " + e.getMessage());
}
System.out.println();
}
System.out.println("尝试完成。请查看输出,找出哪种编码能把简体字段显示正确。");
}
// 简单判断候选字符串是否“看起来正常”(包含中文或全部 ASCII)
private static boolean looksReasonable(String s) {
if (s == null || s.isEmpty()) return false;
int cjk = 0;
int printable = 0;
for (int i = 0; i < s.length(); i++) {
char ch = s.charAt(i);
if (ch >= 0x4E00 && ch <= 0x9FFF) cjk++;
if (ch >= 32 && ch <= 126) printable++;
}
// 包含汉字(更可信),或纯 ASCII(也可能是英文)
return cjk > 0 || printable == s.length();
}
public static void main(String[] args) {
tryFixString("杻梗惆豢");
}
}From:一号门
Previous:Mybatis plus 查询mysql json字段的一些方法
Next: None

COMMENTS