forked from itwanger/toBeBetterJavaer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjava-unicode.html
More file actions
86 lines (79 loc) · 61.1 KB
/
Copy pathjava-unicode.html
File metadata and controls
86 lines (79 loc) · 61.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
<!doctype html>
<html lang="zh-CN" data-theme="light">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<meta name="generator" content="VuePress 2.0.0-beta.67" />
<meta name="theme" content="VuePress Theme Hope" />
<meta name="keywords" content="Java,中文乱码, 字符编码, ASCII, Unicode, UTF-8, GB2312, 解决乱码, 编码详解, 编码原理"><meta property="og:url" content="https://javabetter.cn/basic-extra-meal/java-unicode.html"><meta property="og:site_name" content="二哥的Java进阶之路"><meta property="og:title" content="解决中文乱码:字符编码全攻略 - ASCII、Unicode、UTF-8、GB2312详解"><meta property="og:description" content="本文深入探讨中文乱码及字符编码问题,详细介绍了ASCII、Unicode、UTF-8、GB2312等编码格式的原理与特点。通过理解字符编码的发展历程,可以更好地解决中文乱码问题。本文还将探讨不同编码之间的转换方法,帮助程序员在编程过程中轻松应对字符编码问题,提高代码质量和可读性。"><meta property="og:type" content="article"><meta property="og:locale" content="zh-CN"><meta property="og:updated_time" content="2023-09-04T05:16:42.000Z"><meta property="article:author" content="沉默王二"><meta property="article:tag" content="Java重要知识点"><meta property="article:modified_time" content="2023-09-04T05:16:42.000Z"><script type="application/ld+json">{"@context":"https://schema.org","@type":"Article","headline":"解决中文乱码:字符编码全攻略 - ASCII、Unicode、UTF-8、GB2312详解","image":[""],"dateModified":"2023-09-04T05:16:42.000Z","author":[{"@type":"Person","name":"沉默王二"}]}</script><meta name="robots" content="all"><meta name="author" content="沉默王二"><meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate"><meta http-equiv="Pragma" content="no-cache"><meta http-equiv="Expires" content="0"><meta name="apple-mobile-web-app-capable" content="yes"><script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "https://hm.baidu.com/hm.js?5230ac143650bf5eb3c14f3fb9b1d3ec";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script><link rel="stylesheet" href="//at.alicdn.com/t/font_3180624_7cy10l7jqqh.css"><link rel="icon" href="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/favicon.ico"><link rel="icon" href="/assets/icon/chrome-mask-512.png" type="image/png" sizes="512x512"><link rel="icon" href="/assets/icon/chrome-mask-192.png" type="image/png" sizes="192x192"><link rel="icon" href="/assets/icon/chrome-512.png" type="image/png" sizes="512x512"><link rel="icon" href="/assets/icon/chrome-192.png" type="image/png" sizes="192x192"><link rel="manifest" href="/manifest.webmanifest" crossorigin="use-credentials"><meta name="theme-color" content="#096dd9"><link rel="apple-touch-icon" href="/assets/icon/apple-icon-152.png"><meta name="apple-mobile-web-app-status-bar-style" content="black"><meta name="msapplication-TileImage" content="/assets/icon/ms-icon-144.png"><meta name="msapplication-TileColor" content="#ffffff"><meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, viewport-fit=cover"><title>解决中文乱码:字符编码全攻略 - ASCII、Unicode、UTF-8、GB2312详解 | 二哥的Java进阶之路</title><meta name="description" content="本文深入探讨中文乱码及字符编码问题,详细介绍了ASCII、Unicode、UTF-8、GB2312等编码格式的原理与特点。通过理解字符编码的发展历程,可以更好地解决中文乱码问题。本文还将探讨不同编码之间的转换方法,帮助程序员在编程过程中轻松应对字符编码问题,提高代码质量和可读性。">
<style>
:root {
--bg-color: #fff;
}
html[data-theme="dark"] {
--bg-color: #1d1e1f;
}
html,
body {
background: var(--bg-color);
}
</style>
<script>
const userMode = localStorage.getItem("vuepress-theme-hope-scheme");
const systemDarkMode =
window.matchMedia &&
window.matchMedia("(prefers-color-scheme: dark)").matches;
if (userMode === "dark" || (userMode !== "light" && systemDarkMode)) {
document.documentElement.setAttribute("data-theme", "dark");
}
</script>
<link rel="preload" href="/assets/style-c6d22cf5.css" as="style"><link rel="stylesheet" href="/assets/style-c6d22cf5.css">
<link rel="modulepreload" href="/assets/app-a425eb61.js"><link rel="modulepreload" href="/assets/java-unicode.html-a2931d5f.js"><link rel="modulepreload" href="/assets/plugin-vue_export-helper-c27b6911.js"><link rel="modulepreload" href="/assets/java-unicode.html-476eadda.js">
</head>
<body>
<div id="app"><!--[--><!--[--><!--[--><span tabindex="-1"></span><a href="#main-content" class="vp-skip-link sr-only">跳至主要內容</a><!--]--><div class="theme-container has-toc"><!--[--><header id="navbar" class="vp-navbar"><div class="vp-navbar-start"><button type="button" class="vp-toggle-sidebar-button" title="Toggle Sidebar"><span class="icon"></span></button><!--[--><!----><!--]--><!--[--><a class="vp-link vp-brand" href="/"><img class="vp-nav-logo" src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/logo-02.png" alt="二哥的Java进阶之路"><!----><span class="vp-site-name hide-in-pad">二哥的Java进阶之路</span></a><!--]--><!--[--><!----><!--]--></div><div class="vp-navbar-center"><!--[--><!----><!--]--><!--[--><nav class="vp-nav-links"><div class="nav-item hide-in-mobile"><a class="vp-link nav-link" href="/blog.html"><span class="font-icon icon iconfont icon-gaishu" style=""></span>博客<!----></a></div><div class="nav-item hide-in-mobile"><a class="vp-link nav-link" href="/home.html"><span class="font-icon icon iconfont icon-lujing" style=""></span>进阶之路<!----></a></div><div class="nav-item hide-in-mobile"><a class="vp-link nav-link" href="/zhishixingqiu/"><span class="font-icon icon iconfont icon-Artboard" style=""></span>知识星球<!----></a></div><div class="nav-item hide-in-mobile"><a class="vp-link nav-link" href="/xuexiluxian/"><span class="font-icon icon iconfont icon-luxian" style=""></span>学习路线<!----></a></div><div class="nav-item hide-in-mobile"><div class="dropdown-wrapper"><button type="button" class="dropdown-title" aria-label="珍藏资源"><span class="title"><span class="font-icon icon iconfont icon-youzhi" style=""></span>珍藏资源</span><span class="arrow"></span><ul class="nav-dropdown"><li class="dropdown-item"><a class="vp-link nav-link" href="/pdf/"><span class="font-icon icon iconfont icon-java" style=""></span>PDF下载<!----></a></li><li class="dropdown-item"><a class="vp-link nav-link" href="/sidebar/sanfene/nixi.html"><span class="font-icon icon iconfont icon-zhunbei" style=""></span>面渣逆袭<!----></a></li></ul></button></div></div><div class="nav-item hide-in-mobile"><a href="https://space.bilibili.com/513340480" rel="noopener noreferrer" target="_blank" aria-label="B站视频" class="nav-link"><span class="font-icon icon iconfont icon-bzhan" style=""></span>B站视频<span><svg class="external-link-icon" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" focusable="false" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path><polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg><span class="external-link-icon-sr-only">open in new window</span></span><!----></a></div></nav><!--]--><!--[--><!----><!--]--></div><div class="vp-navbar-end"><!--[--><!----><!--]--><!--[--><!----><div class="nav-item vp-repo"><a class="vp-repo-link" href="https://github.com/itwanger/toBeBetterJavaer" target="_blank" rel="noopener noreferrer" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" class="icon github-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="github icon" style="width:1.25rem;height:1.25rem;vertical-align:middle;"><path d="M511.957 21.333C241.024 21.333 21.333 240.981 21.333 512c0 216.832 140.544 400.725 335.574 465.664 24.49 4.395 32.256-10.07 32.256-23.083 0-11.69.256-44.245 0-85.205-136.448 29.61-164.736-64.64-164.736-64.64-22.315-56.704-54.4-71.765-54.4-71.765-44.587-30.464 3.285-29.824 3.285-29.824 49.195 3.413 75.179 50.517 75.179 50.517 43.776 75.008 114.816 53.333 142.762 40.79 4.523-31.66 17.152-53.377 31.19-65.537-108.971-12.458-223.488-54.485-223.488-242.602 0-53.547 19.114-97.323 50.517-131.67-5.035-12.33-21.93-62.293 4.779-129.834 0 0 41.258-13.184 134.912 50.346a469.803 469.803 0 0 1 122.88-16.554c41.642.213 83.626 5.632 122.88 16.554 93.653-63.488 134.784-50.346 134.784-50.346 26.752 67.541 9.898 117.504 4.864 129.834 31.402 34.347 50.474 78.123 50.474 131.67 0 188.586-114.73 230.016-224.042 242.09 17.578 15.232 33.578 44.672 33.578 90.454v135.85c0 13.142 7.936 27.606 32.854 22.87C862.25 912.597 1002.667 728.747 1002.667 512c0-271.019-219.648-490.667-490.71-490.667z"></path></svg></a></div><div class="nav-item hide-in-mobile"><button type="button" class="outlook-button" tabindex="-1" aria-hidden="true"><svg xmlns="http://www.w3.org/2000/svg" class="icon outlook-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="outlook icon"><path d="M224 800c0 9.6 3.2 44.8 6.4 54.4 6.4 48-48 76.8-48 76.8s80 41.6 147.2 0 134.4-134.4 38.4-195.2c-22.4-12.8-41.6-19.2-57.6-19.2C259.2 716.8 227.2 761.6 224 800zM560 675.2l-32 51.2c-51.2 51.2-83.2 32-83.2 32 25.6 67.2 0 112-12.8 128 25.6 6.4 51.2 9.6 80 9.6 54.4 0 102.4-9.6 150.4-32l0 0c3.2 0 3.2-3.2 3.2-3.2 22.4-16 12.8-35.2 6.4-44.8-9.6-12.8-12.8-25.6-12.8-41.6 0-54.4 60.8-99.2 137.6-99.2 6.4 0 12.8 0 22.4 0 12.8 0 38.4 9.6 48-25.6 0-3.2 0-3.2 3.2-6.4 0-3.2 3.2-6.4 3.2-6.4 6.4-16 6.4-16 6.4-19.2 9.6-35.2 16-73.6 16-115.2 0-105.6-41.6-198.4-108.8-268.8C704 396.8 560 675.2 560 675.2zM224 419.2c0-28.8 22.4-51.2 51.2-51.2 28.8 0 51.2 22.4 51.2 51.2 0 28.8-22.4 51.2-51.2 51.2C246.4 470.4 224 448 224 419.2zM320 284.8c0-22.4 19.2-41.6 41.6-41.6 22.4 0 41.6 19.2 41.6 41.6 0 22.4-19.2 41.6-41.6 41.6C339.2 326.4 320 307.2 320 284.8zM457.6 208c0-12.8 12.8-25.6 25.6-25.6 12.8 0 25.6 12.8 25.6 25.6 0 12.8-12.8 25.6-25.6 25.6C470.4 233.6 457.6 220.8 457.6 208zM128 505.6C128 592 153.6 672 201.6 736c28.8-60.8 112-60.8 124.8-60.8-16-51.2 16-99.2 16-99.2l316.8-422.4c-48-19.2-99.2-32-150.4-32C297.6 118.4 128 291.2 128 505.6zM764.8 86.4c-22.4 19.2-390.4 518.4-390.4 518.4-22.4 28.8-12.8 76.8 22.4 99.2l9.6 6.4c35.2 22.4 80 12.8 99.2-25.6 0 0 6.4-12.8 9.6-19.2 54.4-105.6 275.2-524.8 288-553.6 6.4-19.2-3.2-32-19.2-32C777.6 76.8 771.2 80 764.8 86.4z"></path></svg><div class="outlook-dropdown"><!----></div></button></div><!--[--><div id="docsearch-container" style="display:none;"></div><div><button type="button" class="DocSearch DocSearch-Button" aria-label="搜索文档"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">搜索文档</span></span><span class="DocSearch-Button-Keys"><kbd class="DocSearch-Button-Key"><svg width="15" height="15" class="DocSearch-Control-Key-Icon"><path d="M4.505 4.496h2M5.505 5.496v5M8.216 4.496l.055 5.993M10 7.5c.333.333.5.667.5 1v2M12.326 4.5v5.996M8.384 4.496c1.674 0 2.116 0 2.116 1.5s-.442 1.5-2.116 1.5M3.205 9.303c-.09.448-.277 1.21-1.241 1.203C1 10.5.5 9.513.5 8V7c0-1.57.5-2.5 1.464-2.494.964.006 1.134.598 1.24 1.342M12.553 10.5h1.953" stroke-width="1.2" stroke="currentColor" fill="none" stroke-linecap="square"></path></svg></kbd><kbd class="DocSearch-Button-Key">K</kbd></span></button></div><!--]--><!--]--><!--[--><!----><!--]--><button type="button" class="vp-toggle-navbar-button" aria-label="Toggle Navbar" aria-expanded="false" aria-controls="nav-screen"><span><span class="vp-top"></span><span class="vp-middle"></span><span class="vp-bottom"></span></span></button></div></header><!----><!--]--><!----><div class="toggle-sidebar-wrapper"><span class="arrow start"></span></div><aside id="sidebar" class="vp-sidebar"><!--[--><!----><!--]--><ul class="vp-sidebar-links"><li><!--[--><a class="vp-link nav-link vp-sidebar-link vp-sidebar-page" href="/home.html"><!---->一、前言<!----></a><ul class="vp-sidebar-sub-headers"></ul><!--]--></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable active" type="button"><!----><span class="vp-sidebar-title">二、Java基础</span><span class="vp-arrow down"></span></button><ul class="vp-sidebar-links"><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.1 Java概述及环境配置</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.2 Java语法基础</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.3 数组&字符串</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.4 面向对象编程</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.5 集合框架(容器)</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.6 Java IO</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.7 异常处理</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.8 常用工具类</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.9 Java新特性</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.10 网络编程</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.11 NIO</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable active" type="button"><!----><span class="vp-sidebar-title">2.12 Java重要知识点</span><span class="vp-arrow down"></span></button><ul class="vp-sidebar-links"><li><!--[--><a class="vp-link nav-link vp-sidebar-link vp-sidebar-page" href="/basic-extra-meal/java-naming.html"><!---->Java命名规范<!----></a><ul class="vp-sidebar-sub-headers"></ul><!--]--></li><li><!--[--><a class="vp-link nav-link active vp-sidebar-link vp-sidebar-page active" href="/basic-extra-meal/java-unicode.html"><!---->中文乱码及字符编码全攻略<!----></a><ul class="vp-sidebar-sub-headers"><li class="vp-sidebar-sub-header"><a class="vp-link nav-link vp-sidebar-link vp-heading" href="/basic-extra-meal/java-unicode.html#什么是中文乱码"><!---->什么是中文乱码<!----></a><ul class="vp-sidebar-sub-headers"></ul></li><li class="vp-sidebar-sub-header"><a class="vp-link nav-link vp-sidebar-link vp-heading" href="/basic-extra-meal/java-unicode.html#什么是字符编码"><!---->什么是字符编码<!----></a><ul class="vp-sidebar-sub-headers"></ul></li><li class="vp-sidebar-sub-header"><a class="vp-link nav-link vp-sidebar-link vp-heading" href="/basic-extra-meal/java-unicode.html#中文乱码是如何出现的"><!---->中文乱码是如何出现的?<!----></a><ul class="vp-sidebar-sub-headers"></ul></li><li class="vp-sidebar-sub-header"><a class="vp-link nav-link vp-sidebar-link vp-heading" href="/basic-extra-meal/java-unicode.html#如何解决中文乱码呢"><!---->如何解决中文乱码呢?<!----></a><ul class="vp-sidebar-sub-headers"></ul></li></ul><!--]--></li><li><!--[--><a class="vp-link nav-link vp-sidebar-link vp-sidebar-page" href="/basic-extra-meal/box.html"><!---->深入剖析Java中的拆箱和装箱<!----></a><ul class="vp-sidebar-sub-headers"></ul><!--]--></li><li><!--[--><a class="vp-link nav-link vp-sidebar-link vp-sidebar-page" href="/basic-extra-meal/deep-copy.html"><!---->深入理解Java浅拷贝与深拷贝<!----></a><ul class="vp-sidebar-sub-headers"></ul><!--]--></li><li><!--[--><a class="vp-link nav-link vp-sidebar-link vp-sidebar-page" href="/basic-extra-meal/hashcode.html"><!---->Java hashCode方法解析<!----></a><ul class="vp-sidebar-sub-headers"></ul><!--]--></li><li><!--[--><a class="vp-link nav-link vp-sidebar-link vp-sidebar-page" href="/basic-extra-meal/pass-by-value.html"><!---->Java是值传递还是引用传递?<!----></a><ul class="vp-sidebar-sub-headers"></ul><!--]--></li><li><!--[--><a class="vp-link nav-link vp-sidebar-link vp-sidebar-page" href="/basic-extra-meal/true-generic.html"><!---->Java为什么无法实现真正的泛型<!----></a><ul class="vp-sidebar-sub-headers"></ul><!--]--></li><li><!--[--><a class="vp-link nav-link vp-sidebar-link vp-sidebar-page" href="/basic-extra-meal/fanshe.html"><!---->掌握 Java 反射<!----></a><ul class="vp-sidebar-sub-headers"></ul><!--]--></li></ul></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.13 并发编程</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">2.14 JVM</span><span class="vp-arrow end"></span></button><!----></section></li></ul></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">三、Java进阶</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">四、MySQL</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">五、Redis</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">六、计算机基础</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">七、求职面试</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">八、学习建议</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">九、知识库搭建</span><span class="vp-arrow end"></span></button><!----></section></li><li><section class="vp-sidebar-group"><button class="vp-sidebar-heading clickable" type="button"><!----><span class="vp-sidebar-title">十、联系作者</span><span class="vp-arrow end"></span></button><!----></section></li></ul><!--[--><!----><!--]--></aside><!--[--><main id="main-content" class="vp-page"><!--[--><!----><!----><nav class="vp-breadcrumb disable"></nav><div class="vp-page-title"><h1><!---->解决中文乱码:字符编码全攻略 - ASCII、Unicode、UTF-8、GB2312详解</h1><div class="page-info"><span class="page-author-info" aria-label="作者🖊" data-balloon-pos="down"><svg xmlns="http://www.w3.org/2000/svg" class="icon author-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="author icon"><path d="M649.6 633.6c86.4-48 147.2-144 147.2-249.6 0-160-128-288-288-288s-288 128-288 288c0 108.8 57.6 201.6 147.2 249.6-121.6 48-214.4 153.6-240 288-3.2 9.6 0 19.2 6.4 25.6 3.2 9.6 12.8 12.8 22.4 12.8h704c9.6 0 19.2-3.2 25.6-12.8 6.4-6.4 9.6-16 6.4-25.6-25.6-134.4-121.6-240-243.2-288z"></path></svg><span><span class="page-author-item">沉默王二</span></span><span property="author" content="沉默王二"></span></span><!----><span class="page-date-info" aria-label="写作日期📅" data-balloon-pos="down"><svg xmlns="http://www.w3.org/2000/svg" class="icon calendar-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="calendar icon"><path d="M716.4 110.137c0-18.753-14.72-33.473-33.472-33.473-18.753 0-33.473 14.72-33.473 33.473v33.473h66.993v-33.473zm-334.87 0c0-18.753-14.72-33.473-33.473-33.473s-33.52 14.72-33.52 33.473v33.473h66.993v-33.473zm468.81 33.52H716.4v100.465c0 18.753-14.72 33.473-33.472 33.473a33.145 33.145 0 01-33.473-33.473V143.657H381.53v100.465c0 18.753-14.72 33.473-33.473 33.473a33.145 33.145 0 01-33.473-33.473V143.657H180.6A134.314 134.314 0 0046.66 277.595v535.756A134.314 134.314 0 00180.6 947.289h669.74a134.36 134.36 0 00133.94-133.938V277.595a134.314 134.314 0 00-133.94-133.938zm33.473 267.877H147.126a33.145 33.145 0 01-33.473-33.473c0-18.752 14.72-33.473 33.473-33.473h736.687c18.752 0 33.472 14.72 33.472 33.473a33.145 33.145 0 01-33.472 33.473z"></path></svg><span><!----></span><meta property="datePublished" content="2021-10-22T07:22:10.000Z"></span><span class="page-category-info" aria-label="分类🌈" data-balloon-pos="down"><svg xmlns="http://www.w3.org/2000/svg" class="icon category-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="category icon"><path d="M148.41 106.992h282.176c22.263 0 40.31 18.048 40.31 40.31V429.48c0 22.263-18.047 40.31-40.31 40.31H148.41c-22.263 0-40.311-18.047-40.311-40.31V147.302c0-22.263 18.048-40.31 40.311-40.31zM147.556 553.478H429.73c22.263 0 40.311 18.048 40.311 40.31v282.176c0 22.263-18.048 40.312-40.31 40.312H147.555c-22.263 0-40.311-18.049-40.311-40.312V593.79c0-22.263 18.048-40.311 40.31-40.311zM593.927 106.992h282.176c22.263 0 40.31 18.048 40.31 40.31V429.48c0 22.263-18.047 40.31-40.31 40.31H593.927c-22.263 0-40.311-18.047-40.311-40.31V147.302c0-22.263 18.048-40.31 40.31-40.31zM730.22 920.502H623.926c-40.925 0-74.22-33.388-74.22-74.425V623.992c0-41.038 33.387-74.424 74.425-74.424h222.085c41.038 0 74.424 33.226 74.424 74.067v114.233c0 10.244-8.304 18.548-18.547 18.548s-18.548-8.304-18.548-18.548V623.635c0-20.388-16.746-36.974-37.33-36.974H624.13c-20.585 0-37.331 16.747-37.331 37.33v222.086c0 20.585 16.654 37.331 37.126 37.331H730.22c10.243 0 18.547 8.304 18.547 18.547 0 10.244-8.304 18.547-18.547 18.547z"></path></svg><!--[--><span class="page-category-item category0 clickable" role="navigation">Java核心</span><!--]--><meta property="articleSection" content="Java核心"></span><span class="page-tag-info" aria-label="标签🏷" data-balloon-pos="down"><svg xmlns="http://www.w3.org/2000/svg" class="icon tag-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="tag icon"><path d="M939.902 458.563L910.17 144.567c-1.507-16.272-14.465-29.13-30.737-30.737L565.438 84.098h-.402c-3.215 0-5.726 1.005-7.634 2.913l-470.39 470.39a10.004 10.004 0 000 14.164l365.423 365.424c1.909 1.908 4.42 2.913 7.132 2.913s5.223-1.005 7.132-2.913l470.39-470.39c2.01-2.11 3.014-5.023 2.813-8.036zm-240.067-72.121c-35.458 0-64.286-28.828-64.286-64.286s28.828-64.285 64.286-64.285 64.286 28.828 64.286 64.285-28.829 64.286-64.286 64.286z"></path></svg><!--[--><span class="page-tag-item tag3 clickable" role="navigation">Java重要知识点</span><!--]--><meta property="keywords" content="Java重要知识点"></span><span class="page-word-info" aria-label="字数🔠" data-balloon-pos="down"><svg xmlns="http://www.w3.org/2000/svg" class="icon word-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="word icon"><path d="M518.217 432.64V73.143A73.143 73.143 0 01603.43 1.097a512 512 0 01419.474 419.474 73.143 73.143 0 01-72.046 85.212H591.36a73.143 73.143 0 01-73.143-73.143z"></path><path d="M493.714 566.857h340.297a73.143 73.143 0 0173.143 85.577A457.143 457.143 0 11371.566 117.76a73.143 73.143 0 0185.577 73.143v339.383a36.571 36.571 0 0036.571 36.571z"></path></svg><span>约 3855 字</span><meta property="wordCount" content="3855"></span><span class="page-reading-time-info" aria-label="阅读时间⌛" data-balloon-pos="down"><svg xmlns="http://www.w3.org/2000/svg" class="icon timer-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="timer icon"><path d="M799.387 122.15c4.402-2.978 7.38-7.897 7.38-13.463v-1.165c0-8.933-7.38-16.312-16.312-16.312H256.33c-8.933 0-16.311 7.38-16.311 16.312v1.165c0 5.825 2.977 10.874 7.637 13.592 4.143 194.44 97.22 354.963 220.201 392.763-122.204 37.542-214.893 196.511-220.2 389.397-4.661 5.049-7.638 11.651-7.638 19.03v5.825h566.49v-5.825c0-7.379-2.849-13.981-7.509-18.9-5.049-193.016-97.867-351.985-220.2-389.527 123.24-37.67 216.446-198.453 220.588-392.892zM531.16 450.445v352.632c117.674 1.553 211.787 40.778 211.787 88.676H304.097c0-48.286 95.149-87.382 213.728-88.676V450.445c-93.077-3.107-167.901-81.297-167.901-177.093 0-8.803 6.99-15.793 15.793-15.793 8.803 0 15.794 6.99 15.794 15.793 0 80.261 63.69 145.635 142.01 145.635s142.011-65.374 142.011-145.635c0-8.803 6.99-15.793 15.794-15.793s15.793 6.99 15.793 15.793c0 95.019-73.789 172.82-165.96 177.093z"></path></svg><span>大约 13 分钟</span><meta property="timeRequired" content="PT13M"></span></div><hr></div><div class="toc-place-holder"><aside id="toc"><!--[--><!----><!--]--><div class="toc-header">此页内容<button type="button" class="print-button" title="打印"><svg xmlns="http://www.w3.org/2000/svg" class="icon print-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="print icon"><path d="M819.2 364.8h-44.8V128c0-17.067-14.933-32-32-32H281.6c-17.067 0-32 14.933-32 32v236.8h-44.8C145.067 364.8 96 413.867 96 473.6v192c0 59.733 49.067 108.8 108.8 108.8h44.8V896c0 17.067 14.933 32 32 32h460.8c17.067 0 32-14.933 32-32V774.4h44.8c59.733 0 108.8-49.067 108.8-108.8v-192c0-59.733-49.067-108.8-108.8-108.8zM313.6 160h396.8v204.8H313.6V160zm396.8 704H313.6V620.8h396.8V864zM864 665.6c0 25.6-19.2 44.8-44.8 44.8h-44.8V588.8c0-17.067-14.933-32-32-32H281.6c-17.067 0-32 14.933-32 32v121.6h-44.8c-25.6 0-44.8-19.2-44.8-44.8v-192c0-25.6 19.2-44.8 44.8-44.8h614.4c25.6 0 44.8 19.2 44.8 44.8v192z"></path></svg></button></div><div class="toc-wrapper"><ul class="toc-list"><!--[--><li class="toc-item"><a class="vp-link toc-link level3" href="/#什么是中文乱码">什么是中文乱码</a></li><!----><!--]--><!--[--><li class="toc-item"><a class="vp-link toc-link level3" href="/#什么是字符编码">什么是字符编码</a></li><!----><!--]--><!--[--><li class="toc-item"><a class="vp-link toc-link level3" href="/#中文乱码是如何出现的">中文乱码是如何出现的?</a></li><!----><!--]--><!--[--><li class="toc-item"><a class="vp-link toc-link level3" href="/#如何解决中文乱码呢">如何解决中文乱码呢?</a></li><!----><!--]--></ul><div class="toc-marker" style="top:-1.7rem;"></div></div><!--[--><!----><!--]--></aside></div><!----><div class="theme-hope-content"><h1 id="_13-2-中文乱码及字符编码全攻略" tabindex="-1"><a class="header-anchor" href="#_13-2-中文乱码及字符编码全攻略" aria-hidden="true">#</a> 13.2 中文乱码及字符编码全攻略</h1><p>就在昨天,我在<a href="https://javabetter.cn/zhishixingqiu/" target="_blank" rel="noopener noreferrer">二哥的编程星球<span><svg class="external-link-icon" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" focusable="false" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path><polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg><span class="external-link-icon-sr-only">open in new window</span></span></a>里看到这样一张截图,有球友反馈说支付宝的理财页面出现了中文乱码,估计不少小伙伴和我一样,都惊呆了😮!阿里这种大厂还能出现这种低级错误?</p><figure><img src="https://cdn.tobebetterjavaer.com/stutymore/java-unicode-20230408135521.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><p>难道昨天的人力都投入到“通义千问”的突然发布上了?</p><p>那借这个机会,我就来和大家探讨一下,中文乱码是怎么出现的,以及如何快速解决乱码问题。</p><h3 id="什么是中文乱码" tabindex="-1"><a class="header-anchor" href="#什么是中文乱码" aria-hidden="true">#</a> 什么是中文乱码</h3><p>Q:什么是中文乱码?</p><p>GPT4:中文乱码是指在计算机系统或软件中,由于字符编码不一致或处理不当,导致中文字符无法正常显示,出现乱码现象。这种现象通常表现为中文字符被替换成其他不相关的字符、符号或乱码。</p><p>比如说,历史上著名的“锟斤拷”同学,就是中文乱码的绝佳代言人。</p><figure><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/core-points/ten-06.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><h3 id="什么是字符编码" tabindex="-1"><a class="header-anchor" href="#什么是字符编码" aria-hidden="true">#</a> 什么是字符编码</h3><p>Q:那什么是字符编码呢?</p><p>GPT4:字符编码(Character Encoding)是一种规则或约定,它将字符(如字母、数字、标点符号等)映射到计算机中的二进制数据。简单来说,字符编码就是将人类可读的字符转换为计算机可以处理的二进制数据的过程。</p><p>对于计算机来说,只认 0 和 1,所有的信息最终都是一个二进制数。一个二进制数要么是 0,要么是 1。8 个二进制数放在一起是一个字节,就会组合出 256 种状态,也就是 2 的 8 次方(<code>2^8</code>),从 00000000 到 11111111。</p><p>为了让计算机能够识别和处理各种字符,需要使用字符编码来对字符进行编码和解码。常见的字符编码包括 ASCII、UTF-8、UTF-16、GBK、GB2312 等。</p><figure><img src="https://cdn.tobebetterjavaer.com/studymore/char-byte-20230322174312.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><h4 id="_1-ascii" tabindex="-1"><a class="header-anchor" href="#_1-ascii" aria-hidden="true">#</a> <strong>1)ASCII</strong></h4><p>ASCII 码由电报码发展而来,第一版标准发布于 1963 年,最后一次更新则是在 1986 年,至今为止共定义了 128 个字符。其中 33 个字符无法显示在一般的设备上,需要用特殊的设备才能显示。</p><p>ASCII 码的局限在于只能显示 26 个基本拉丁字母、阿拉伯数字和英式标点符号,因此只能用于显示现代美国英语,对于其他一些语言则无能无力,比如在法语中,字母上方有注音符号,它就无法用 ASCII 码表示。</p><figure><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/core-points/ten-01.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><p>PS:拉丁字母(也称为罗马字母)是多数欧洲语言采用的字母系统,是世界上最通行的字母文字系统,是罗马文明的成果之一。</p><p>虽然名称上叫作拉丁字母,但拉丁文中并没有用 J、U 和 W 这三个字母。</p><p>在我的印象中,可能说拉丁字母多少有些陌生,说英语字母可能就有直观的印象了。</p><figure><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/core-points/ten-02.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><p>阿拉伯数字,我们都很熟悉了。</p><figure><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/core-points/ten-03.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><p>但是,阿拉伯数字并非起源于阿拉伯,而是起源于古印度。学过历史的你应该有一些印象,阿拉伯分布于西亚和北非,以阿拉伯语为主要语言,以伊斯兰教为主要信仰。</p><figure><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/core-points/ten-04.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><p>处在这样的地理位置,做起东亚和欧洲的一些生意就很有优势,于是阿拉伯数字就由阿拉伯人传到了欧洲,因此得名。</p><p>英式标点符号,也叫英文标点符号,和中文标点符号很相近。标点符号是辅助文字记录语言的符号,是书面语的组成部分,用来表示停顿、加强语气等。</p><p>英文标点符号在 16 世纪时,分为朗诵学派和句法学派,主要由古典时期的希腊文和拉丁文演变而来,在 17 世纪后进入稳定阶段。俄文的标点符号依据希腊文而来,到了 18 世纪后也采用了英文标点符号。</p><p>在很多人的印象中,古文是没有标点符号的,但管锡华博士研究指出,<strong>中国早在先秦时代就有标点符号了</strong>,后来融合了一些英文标点符号后,逐渐形成了现在的中文标点符号。</p><figure><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/core-points/ten-05.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><h4 id="_2-unicode" tabindex="-1"><a class="header-anchor" href="#_2-unicode" aria-hidden="true">#</a> <strong>2)Unicode</strong></h4><p>这个世界上,除了英语,还有法语、葡萄牙语、西班牙语、德语、俄语、阿拉伯语、韩语、日语等等等等。ASCII 码用来表示英语是绰绰有余的,但其他这些语言就没办法了。</p><p>像我的主人二哥的母语——中文,就博大精深,与其对应的汉字数量很多很多,东汉的《说文解字》收字 9353 个,清朝《康熙字典》收字 47035 个,当代的《汉语大字典》收字 60370 个。1994 年中华书局、中国友谊出版公司出版的《中华字海》收字 85568 个。</p><blockquote><p>常用字大概 2500 个,次常用字 1000 个。</p></blockquote><p>一个字节只能表示 256 种符号,所以如果拿 ASCII 码来表示汉字的话,是远远不够用的,那就必须要用更多的字节。简体中文常见的编码方式是 GB2312,使用两个字节表示一个汉字,理论上最多可以表示 256 x 256 = 65536 个符号。</p><p>要知道,世界上存在着多种编码方式,同一个二进制数字可以被解释成不同的符号。因此,要想打开一个文本文件,就必须知道它的编码方式,否则用错误的编码方式解读,就会出现乱码。</p><ul><li>编码就是将原始数据(比如说文本、图像、视频、音频等)转换为二进制形式。</li><li>解码就是将二进制数据转换为原始数据,是一个反向的过程。</li></ul><p>如果有一种编码,将世界上所有的符号都纳入其中。每一个符号都给予一个独一无二的编码,那么乱码问题就会彻底消失。</p><p>这个艰巨的任务由谁来完成呢?<strong>Unicode</strong>,中文译作万国码、国际码、统一码、单一码,就像它的名字都表示的,这是一种所有符号的编码。</p><p>Unicode 至今仍在不断增修,每个新版本都会加入更多新的字符。目前最新的版本为 2020 年 3 月公布的 13.0,收录了 13 万个字符。</p><figure><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/core-points/ten-07.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure><p>Unicode 是一个很大的集合,现在的规模可以容纳 100 多万个符号。每个符号的编码都不一样,比如,<code>U+0639</code>表示阿拉伯字母 <code>Ain</code>,<code>U+0041</code> 表示英语的大写字母 <code>A</code>,<code>U+4E25</code> 表示汉字<code>严</code>。</p><p>具体的符号对应表,可以查询:</p><blockquote><ul><li><a href="http://unicode.org" target="_blank" rel="noopener noreferrer">unicode.org<span><svg class="external-link-icon" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" focusable="false" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path><polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg><span class="external-link-icon-sr-only">open in new window</span></span></a>:<a href="http://www.unicode.org" target="_blank" rel="noopener noreferrer">http://www.unicode.org<span><svg class="external-link-icon" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" focusable="false" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path><polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg><span class="external-link-icon-sr-only">open in new window</span></span></a></li><li>汉字对应表:<a href="http://www.chi2ko.com/tool/CJK.htm" target="_blank" rel="noopener noreferrer">ttp://www.chi2ko.com/tool/CJK.htm<span><svg class="external-link-icon" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" focusable="false" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path><polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg><span class="external-link-icon-sr-only">open in new window</span></span></a></li></ul></blockquote><p>曾有人这样说:</p><blockquote><p>Unicode 支持的字符上限是 65536 个,Unicode 字符必须占两个字节。</p></blockquote><p>但这是一种误解,记住,Unicode 只是一个用来映射字符和数字的标准。它对支持字符的数量没有限制,也不要求字符必须占两个、三个或者其它任意数量的字节,所以它可以无穷大。</p><h4 id="_3-utf-8" tabindex="-1"><a class="header-anchor" href="#_3-utf-8" aria-hidden="true">#</a> 3)UTF-8</h4><p>Unicode 虽然统一了全世界字符的编码,但没有规定如何存储。如果统一规定的话,每个符号就要用 3 个或 4 个字节表示,因为 2 个字节只能表示 65536 个,根本表示不全。</p><p>那怎么办呢?</p><p>UTF(Unicode Transformation Formats,Unicode 的编码方式)来了!最常见的就是 UTF-8 和 UTF-16。</p><p>在 UTF-8 中,0-127 号的字符用 1 个字节来表示,使用和 ASCII 相同的编码。只有 128 号及以上的字符才用 2 个、3 个或者 4 个字节来表示。</p><p>如果只有一个字节,那么最高的比特位为 0;如果有多个字节,那么第一个字节从最高位开始,连续有几个比特位的值为 1,就使用几个字节编码,剩下的字节均以 10 开头。</p><p>具体的表现形式为:</p><ul><li>0xxxxxxx:一个字节;</li><li>110xxxxx 10xxxxxx:两个字节编码形式(开始两个 1);</li><li>1110xxxx 10xxxxxx 10xxxxxx:三字节编码形式(开始三个 1);</li><li>11110xxx 10xxxxxx 10xxxxxx 10xxxxxx:四字节编码形式(开始四个 1)。</li></ul><p>也就是说,UTF-8 是一种可变长度的编码方式——这是它的优势也是劣势。</p><p>怎么讲呢?优势就是它包罗万象,劣势就是浪费空间。举例来说吧,UTF-8 采用了 3 个字节(<code>256*256*256=16777216</code>)来编码常用的汉字,但常用的汉字没有这么多,这对于计算机来说,就是一种严重的资源浪费。</p><h4 id="_4-gb2312" tabindex="-1"><a class="header-anchor" href="#_4-gb2312" aria-hidden="true">#</a> 4)GB2312</h4><p>基于这样的考虑,中国国家标准总局于 1980 年发布了 GB2312 编码,即中华人民共和国国家标准简体中文字符集。GB2312 标准共收录 6763 个汉字(2 个字节就够用了),其中一级汉字 3755 个,二级汉字 3008 个;同时收录了包括拉丁字母、希腊字母、日文平假名及片假名字母、俄语西里尔字母在内的 682 个字符。</p><p>GB2312 的出现,基本满足了汉字的计算机处理需求。对于人名、古汉语等方面出现的罕用字和繁体字,GB2312 不能处理,就有了 GBK(K 为“扩展”的汉语拼音(kuòzhǎn)第一个声母)。</p><h3 id="中文乱码是如何出现的" tabindex="-1"><a class="header-anchor" href="#中文乱码是如何出现的" aria-hidden="true">#</a> 中文乱码是如何出现的?</h3><p>Q:来一串出现中文乱码的 demo,中文为沉默王二</p><p>GPT4:下面是一个可能导致中文乱码的示例。首先使用 GBK 编码将中文字符串 "沉默王二" 转换为字节数组,然后使用 UTF-8 编码将字节数组转回字符串。这将导致字符串在解码时出现乱码。</p><div class="language-java line-numbers-mode" data-ext="java"><pre class="language-java"><code><span class="token keyword">public</span> <span class="token keyword">class</span> <span class="token class-name">EncodingDemo</span> <span class="token punctuation">{</span>
<span class="token keyword">public</span> <span class="token keyword">static</span> <span class="token keyword">void</span> <span class="token function">main</span><span class="token punctuation">(</span><span class="token class-name">String</span><span class="token punctuation">[</span><span class="token punctuation">]</span> args<span class="token punctuation">)</span> <span class="token punctuation">{</span>
<span class="token class-name">String</span> originalStr <span class="token operator">=</span> <span class="token string">"沉默王二"</span><span class="token punctuation">;</span>
<span class="token class-name">String</span> encodedStr <span class="token operator">=</span> <span class="token string">""</span><span class="token punctuation">;</span>
<span class="token keyword">try</span> <span class="token punctuation">{</span>
<span class="token keyword">byte</span><span class="token punctuation">[</span><span class="token punctuation">]</span> bytes <span class="token operator">=</span> originalStr<span class="token punctuation">.</span><span class="token function">getBytes</span><span class="token punctuation">(</span><span class="token string">"GBK"</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
encodedStr <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">String</span><span class="token punctuation">(</span>bytes<span class="token punctuation">,</span> <span class="token string">"UTF-8"</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span> <span class="token keyword">catch</span> <span class="token punctuation">(</span><span class="token class-name">UnsupportedEncodingException</span> e<span class="token punctuation">)</span> <span class="token punctuation">{</span>
e<span class="token punctuation">.</span><span class="token function">printStackTrace</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span><span class="token string">"转码前: "</span> <span class="token operator">+</span> originalStr<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span><span class="token string">"转码后: "</span> <span class="token operator">+</span> encodedStr<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token punctuation">}</span>
<span class="token punctuation">}</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><p>程序会输出什么呢?</p><div class="language-text line-numbers-mode" data-ext="text"><pre class="language-text"><code>转码前: 沉默王二
转码后: ��Ĭ����
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div></div></div><p>嘿嘿,乱码来了!在 Unicode 中,� 是一个特殊的符号,它用来表示无法显示,它的十六进制是 <code>0xEF 0xBF 0xBD</code>。那么两个 �� 就是 <code>0xEF 0xBF 0xBD 0xEF 0xBF 0xBD</code>,如果用 GBK 进行解码的话,就是大名鼎鼎的“<strong>锟斤拷</strong>”。</p><p>可以通过代码来验证一下:</p><div class="language-java line-numbers-mode" data-ext="java"><pre class="language-java"><code><span class="token comment">// 输出 efbfbdefbfbd</span>
<span class="token keyword">char</span><span class="token punctuation">[</span><span class="token punctuation">]</span> kuijinkao <span class="token operator">=</span> <span class="token class-name">HexUtil</span><span class="token punctuation">.</span><span class="token function">encodeHex</span><span class="token punctuation">(</span><span class="token string">"��"</span><span class="token punctuation">,</span> <span class="token class-name">StandardCharsets</span><span class="token punctuation">.</span><span class="token constant">UTF_8</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>kuijinkao<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token comment">// 借助 hutool 转成二进制</span>
<span class="token keyword">byte</span><span class="token punctuation">[</span><span class="token punctuation">]</span> testBytes <span class="token operator">=</span> <span class="token class-name">HexUtil</span><span class="token punctuation">.</span><span class="token function">decodeHex</span><span class="token punctuation">(</span>kuijinkao<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token comment">// 使用 GBK 解码</span>
<span class="token class-name">String</span> testResult <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">String</span><span class="token punctuation">(</span>testBytes<span class="token punctuation">,</span> <span class="token class-name">Charset</span><span class="token punctuation">.</span><span class="token function">forName</span><span class="token punctuation">(</span><span class="token string">"GBK"</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token comment">// 输出锟斤拷</span>
<span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span>testResult<span class="token punctuation">)</span><span class="token punctuation">;</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><p>来看一下输出结果:</p><div class="language-java line-numbers-mode" data-ext="java"><pre class="language-java"><code>efbfbdefbfbd
锟斤拷
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div></div></div><p>所以,以后再见到<strong>锟斤拷</strong>,第一时间想到 UTF-8 和 GBK 的转换问题准没错。</p><h3 id="如何解决中文乱码呢" tabindex="-1"><a class="header-anchor" href="#如何解决中文乱码呢" aria-hidden="true">#</a> 如何解决中文乱码呢?</h3><p>Q:如何解决中文乱码呢?</p><p>GPT4:解决中文乱码问题需要确保在编码和解码过程中使用相同的字符编码。这是一个修改过的示例,展示了如何避免中文乱码问题:</p><div class="language-java line-numbers-mode" data-ext="java"><pre class="language-java"><code><span class="token class-name">String</span> originalStr <span class="token operator">=</span> <span class="token string">"沉默王二"</span><span class="token punctuation">;</span>
<span class="token keyword">byte</span><span class="token punctuation">[</span><span class="token punctuation">]</span> bytes <span class="token operator">=</span> originalStr<span class="token punctuation">.</span><span class="token function">getBytes</span><span class="token punctuation">(</span><span class="token class-name">StandardCharsets</span><span class="token punctuation">.</span><span class="token constant">UTF_8</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token class-name">String</span> encodedStr <span class="token operator">=</span> <span class="token keyword">new</span> <span class="token class-name">String</span><span class="token punctuation">(</span>bytes<span class="token punctuation">,</span> <span class="token class-name">StandardCharsets</span><span class="token punctuation">.</span><span class="token constant">UTF_8</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span><span class="token string">"正确转码前: "</span> <span class="token operator">+</span> originalStr<span class="token punctuation">)</span><span class="token punctuation">;</span>
<span class="token class-name">System</span><span class="token punctuation">.</span>out<span class="token punctuation">.</span><span class="token function">println</span><span class="token punctuation">(</span><span class="token string">"正确转码后: "</span> <span class="token operator">+</span> encodedStr<span class="token punctuation">)</span><span class="token punctuation">;</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><p>输出结果:</p><div class="language-java line-numbers-mode" data-ext="java"><pre class="language-java"><code>正确转码前<span class="token operator">:</span> 沉默王二
正确转码后<span class="token operator">:</span> 沉默王二
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div></div></div><p>除此之外,以下是一些避免乱码的实践:</p><ul><li>使用统一的字符编码,如 UTF-8。它包含了所有 Unicode 字符,并且广泛支持。确保在所有平台和系统上使用相同的编码。</li><li>在处理文本文件时,始终明确指定字符编码。例如,在 Java 中使用 InputStreamReader、OutputStreamWriter 时,指定编码参数。</li><li>在 HTML、XML 等文档中,指定字符编码。例如,在 HTML 页面中添加 <code><meta charset="UTF-8"></code>。</li><li>对于数据库,确保使用正确的字符集。在创建数据库和表时,明确指定字符集,如 utf8mb4。</li></ul><p>当与第三方服务通信时,确保双方使用相同的字符编码。在处理 API 请求和响应时,检查字符编码是否匹配。</p><p>Q:针对支付宝这种生产环境事故,如何快速定位并解决问题呢?</p><p>GPT4:</p><p>1、收集信息:了解问题的具体表现,例如哪些页面或功能受到影响,问题的严重程度,以及用户反馈。这将有助于快速定位问题范围。</p><p>2、检查日志:查看服务器日志和应用程序日志,检查是否存在异常信息或错误。日志信息有助于识别问题发生的时间和具体原因。</p><p>3、复现问题:在测试环境中尝试复现问题,以便更深入地了解问题细节。如果无法复现问题,可能需要进一步收集生产环境的详细信息。</p><p>4、定位问题:根据收集到的信息,确定问题可能发生的代码或系统部分。检查相关代码,特别是涉及字符编码、数据传输、数据库操作的部分。</p><p>5、解决问题:修复问题代码,确保字符编码在整个系统中保持一致。进行单元测试和集成测试,验证问题是否已解决。</p><p>6、部署修复:将修复后的代码部署到生产环境。为了避免影响正常服务,可以考虑先在一个小规模的生产环境中进行验证,然后逐步扩大部署范围。</p><p>7、监控:在修复后持续关注系统运行状况,确保问题已得到解决。收集用户反馈,确认用户满意度。</p><p>8、总结经验:针对此次问题,总结经验教训,分析问题产生的根本原因,优化开发和测试流程,避免类似问题再次发生。</p><hr><p>GitHub 上标星 9300+ 的开源知识库《<a href="https://github.com/itwanger/toBeBetterJavaer" target="_blank" rel="noopener noreferrer">二哥的 Java 进阶之路<span><svg class="external-link-icon" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" focusable="false" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path><polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg><span class="external-link-icon-sr-only">open in new window</span></span></a>》第一版 PDF 终于来了!包括Java基础语法、数组&字符串、OOP、集合框架、Java IO、异常处理、Java 新特性、网络编程、NIO、并发编程、JVM等等,共计 32 万余字,500+张手绘图,可以说是通俗易懂、风趣幽默……详情戳:<a href="https://javabetter.cn/overview/" target="_blank" rel="noopener noreferrer">太赞了,GitHub 上标星 9300+ 的 Java 教程<span><svg class="external-link-icon" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" focusable="false" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path><polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg><span class="external-link-icon-sr-only">open in new window</span></span></a></p><p>微信搜 <strong>沉默王二</strong> 或扫描下方二维码关注二哥的原创公众号沉默王二,回复 <strong>222</strong> 即可免费领取。</p><figure><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/gongzhonghao.png" alt="" tabindex="0" loading="lazy"><figcaption></figcaption></figure></div><!----><footer class="page-meta"><div class="meta-item edit-link"><a href="https://github.com/itwanger/toBeBetterJavaer/edit/master/docs/basic-extra-meal/java-unicode.md" rel="noopener noreferrer" target="_blank" aria-label="编辑此页" class="nav-link label"><!--[--><svg xmlns="http://www.w3.org/2000/svg" class="icon edit-icon" viewBox="0 0 1024 1024" fill="currentColor" aria-label="edit icon"><path d="M430.818 653.65a60.46 60.46 0 0 1-50.96-93.281l71.69-114.012 7.773-10.365L816.038 80.138A60.46 60.46 0 0 1 859.225 62a60.46 60.46 0 0 1 43.186 18.138l43.186 43.186a60.46 60.46 0 0 1 0 86.373L588.879 565.55l-8.637 8.637-117.466 68.234a60.46 60.46 0 0 1-31.958 11.229z"></path><path d="M728.802 962H252.891A190.883 190.883 0 0 1 62.008 771.98V296.934a190.883 190.883 0 0 1 190.883-192.61h267.754a60.46 60.46 0 0 1 0 120.92H252.891a69.962 69.962 0 0 0-69.098 69.099V771.98a69.962 69.962 0 0 0 69.098 69.098h475.911A69.962 69.962 0 0 0 797.9 771.98V503.363a60.46 60.46 0 1 1 120.922 0V771.98A190.883 190.883 0 0 1 728.802 962z"></path></svg><!--]-->编辑此页<span><svg class="external-link-icon" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" focusable="false" x="0px" y="0px" viewBox="0 0 100 100" width="15" height="15"><path fill="currentColor" d="M18.8,85.1h56l0,0c2.2,0,4-1.8,4-4v-32h-8v28h-48v-48h28v-8h-32l0,0c-2.2,0-4,1.8-4,4v56C14.8,83.3,16.6,85.1,18.8,85.1z"></path><polygon fill="currentColor" points="45.7,48.7 51.3,54.3 77.2,28.5 77.2,37.2 85.2,37.2 85.2,14.9 62.8,14.9 62.8,22.9 71.5,22.9"></polygon></svg><span class="external-link-icon-sr-only">open in new window</span></span><!----></a></div><div class="meta-item git-info"><div class="update-time"><span class="label">上次编辑于: </span><!----></div><div class="contributors"><span class="label">贡献者: </span><!--[--><!--[--><span class="contributor" title="email: www.qing_gee@163.com">itwanger</span>,<!--]--><!--[--><span class="contributor" title="email: www.qing_gee@163.com">沉默王二</span><!--]--><!--]--></div></div></footer><nav class="vp-page-nav"><a class="vp-link nav-link prev" href="/basic-extra-meal/java-naming.html"><div class="hint"><span class="arrow start"></span>上一页</div><div class="link"><!---->Java命名规范</div></a><a class="vp-link nav-link next" href="/basic-extra-meal/box.html"><div class="hint">下一页<span class="arrow end"></span></div><div class="link">深入剖析Java中的拆箱和装箱<!----></div></a></nav><div id="comment" class="giscus-wrapper input-top" style="display:block;"><div class="loading-icon-wrapper" style="display:flex;align-items:center;justify-content:center;height:96px"><svg xmlns="http://www.w3.org/2000/svg" width="48" height="48" preserveAspectRatio="xMidYMid" viewBox="25 25 50 50"><animateTransform attributeName="transform" type="rotate" dur="2s" keyTimes="0;1" repeatCount="indefinite" values="0;360"></animateTransform><circle cx="50" cy="50" r="20" fill="none" stroke="currentColor" stroke-width="4" stroke-linecap="round"><animate attributeName="stroke-dasharray" dur="1.5s" keyTimes="0;0.5;1" repeatCount="indefinite" values="1,200;90,200;1,200"></animate><animate attributeName="stroke-dashoffset" dur="1.5s" keyTimes="0;0.5;1" repeatCount="indefinite" values="0;-35px;-125px"></animate></circle></svg></div></div><!----><!--]--></main><!--]--><footer class="vp-footer-wrapper"><div class="vp-footer"><a href="https://beian.miit.gov.cn/" target="_blank">豫ICP备2021038026号-4</a><img src="https://cdn.tobebetterjavaer.com/tobebetterjavaer/images/beian.png" height="15px" width="15px" /><a target="_blank" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=41030502000411"><span>豫公网安备 41030502000411号</span></a></div><div class="vp-copyright">Copyright © 2023 沉默王二</div></footer></div><!--]--><!----><!----><!--]--></div>
<script type="module" src="/assets/app-a425eb61.js" defer></script>
</body>
</html>