1 |
<?xml version="1.0" encoding="utf-8" ?> |
2 |
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
3 |
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
4 |
<!-- |
5 |
This HTML is auto-generated. DO NOT EDIT THIS FILE! If you are writing a new |
6 |
PEP, see http://www.python.org/peps/pep-0001.html for instructions and links |
7 |
to templates. DO NOT USE THIS HTML FILE AS YOUR TEMPLATE! |
8 |
--> |
9 |
<head> |
10 |
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
11 |
<meta name="generator" content="Docutils 0.4: http://docutils.sourceforge.net/" /> |
12 |
<title>GLEP 31 -- Character Sets for Portage Tree Items</title> |
13 |
<style type="text/css"> |
14 |
|
15 |
/* |
16 |
:Author: David Goodger |
17 |
:Contact: goodger@users.sourceforge.net |
18 |
:date: $Date: 2006/10/10 20:25:14 $ |
19 |
:version: $Revision: 1.5 $ |
20 |
:copyright: This stylesheet has been placed in the public domain. |
21 |
|
22 |
Default cascading style sheet for the PEP HTML output of Docutils. |
23 |
*/ |
24 |
|
25 |
.first { |
26 |
margin-top: 0 } |
27 |
|
28 |
.last { |
29 |
margin-bottom: 0 } |
30 |
|
31 |
.navigation { |
32 |
width: 100% ; |
33 |
background: #cc99ff ; |
34 |
margin-top: 0px ; |
35 |
margin-bottom: 0px } |
36 |
|
37 |
.navigation .navicon { |
38 |
width: 150px ; |
39 |
height: 35px } |
40 |
|
41 |
.navigation .textlinks { |
42 |
padding-left: 1em ; |
43 |
text-align: left } |
44 |
|
45 |
.navigation td, .navigation th { |
46 |
padding-left: 0em ; |
47 |
padding-right: 0em ; |
48 |
vertical-align: middle } |
49 |
|
50 |
.rfc2822 { |
51 |
margin-top: 0.5em ; |
52 |
margin-left: 0.5em ; |
53 |
margin-right: 0.5em ; |
54 |
margin-bottom: 0em } |
55 |
|
56 |
.rfc2822 td { |
57 |
text-align: left } |
58 |
|
59 |
.rfc2822 th.field-name { |
60 |
text-align: right ; |
61 |
font-family: sans-serif ; |
62 |
padding-right: 0.5em ; |
63 |
font-weight: bold ; |
64 |
margin-bottom: 0em } |
65 |
|
66 |
a.toc-backref { |
67 |
text-decoration: none ; |
68 |
color: black } |
69 |
|
70 |
body { |
71 |
margin: 0px ; |
72 |
margin-bottom: 1em ; |
73 |
padding: 0px } |
74 |
|
75 |
dd { |
76 |
margin-bottom: 0.5em } |
77 |
|
78 |
div.section { |
79 |
margin-left: 1em ; |
80 |
margin-right: 1em ; |
81 |
margin-bottom: 1.5em } |
82 |
|
83 |
div.section div.section { |
84 |
margin-left: 0em ; |
85 |
margin-right: 0em ; |
86 |
margin-top: 1.5em } |
87 |
|
88 |
div.abstract { |
89 |
margin: 2em 5em } |
90 |
|
91 |
div.abstract p.topic-title { |
92 |
font-weight: bold ; |
93 |
text-align: center } |
94 |
|
95 |
div.attention, div.caution, div.danger, div.error, div.hint, |
96 |
div.important, div.note, div.tip, div.warning { |
97 |
margin: 2em ; |
98 |
border: medium outset ; |
99 |
padding: 1em } |
100 |
|
101 |
div.attention p.admonition-title, div.caution p.admonition-title, |
102 |
div.danger p.admonition-title, div.error p.admonition-title, |
103 |
div.warning p.admonition-title { |
104 |
color: red ; |
105 |
font-weight: bold ; |
106 |
font-family: sans-serif } |
107 |
|
108 |
div.hint p.admonition-title, div.important p.admonition-title, |
109 |
div.note p.admonition-title, div.tip p.admonition-title { |
110 |
font-weight: bold ; |
111 |
font-family: sans-serif } |
112 |
|
113 |
div.figure { |
114 |
margin-left: 2em } |
115 |
|
116 |
div.footer, div.header { |
117 |
font-size: smaller } |
118 |
|
119 |
div.footer { |
120 |
margin-left: 1em ; |
121 |
margin-right: 1em } |
122 |
|
123 |
div.system-messages { |
124 |
margin: 5em } |
125 |
|
126 |
div.system-messages h1 { |
127 |
color: red } |
128 |
|
129 |
div.system-message { |
130 |
border: medium outset ; |
131 |
padding: 1em } |
132 |
|
133 |
div.system-message p.system-message-title { |
134 |
color: red ; |
135 |
font-weight: bold } |
136 |
|
137 |
div.topic { |
138 |
margin: 2em } |
139 |
|
140 |
h1 { |
141 |
font-family: sans-serif ; |
142 |
font-size: large } |
143 |
|
144 |
h2 { |
145 |
font-family: sans-serif ; |
146 |
font-size: medium } |
147 |
|
148 |
h3 { |
149 |
font-family: sans-serif ; |
150 |
font-size: small } |
151 |
|
152 |
h4 { |
153 |
font-family: sans-serif ; |
154 |
font-style: italic ; |
155 |
font-size: small } |
156 |
|
157 |
h5 { |
158 |
font-family: sans-serif; |
159 |
font-size: x-small } |
160 |
|
161 |
h6 { |
162 |
font-family: sans-serif; |
163 |
font-style: italic ; |
164 |
font-size: x-small } |
165 |
|
166 |
.section hr { |
167 |
width: 75% } |
168 |
|
169 |
ol.simple, ul.simple { |
170 |
margin-bottom: 1em } |
171 |
|
172 |
ol.arabic { |
173 |
list-style: decimal } |
174 |
|
175 |
ol.loweralpha { |
176 |
list-style: lower-alpha } |
177 |
|
178 |
ol.upperalpha { |
179 |
list-style: upper-alpha } |
180 |
|
181 |
ol.lowerroman { |
182 |
list-style: lower-roman } |
183 |
|
184 |
ol.upperroman { |
185 |
list-style: upper-roman } |
186 |
|
187 |
p.caption { |
188 |
font-style: italic } |
189 |
|
190 |
p.credits { |
191 |
font-style: italic ; |
192 |
font-size: smaller } |
193 |
|
194 |
p.label { |
195 |
white-space: nowrap } |
196 |
|
197 |
p.topic-title { |
198 |
font-family: sans-serif ; |
199 |
font-weight: bold } |
200 |
|
201 |
pre.line-block { |
202 |
font-family: serif ; |
203 |
font-size: 100% } |
204 |
|
205 |
pre.literal-block, pre.doctest-block { |
206 |
margin-left: 2em ; |
207 |
margin-right: 2em ; |
208 |
background-color: #eeeeee } |
209 |
|
210 |
span.classifier { |
211 |
font-family: sans-serif ; |
212 |
font-style: oblique } |
213 |
|
214 |
span.classifier-delimiter { |
215 |
font-family: sans-serif ; |
216 |
font-weight: bold } |
217 |
|
218 |
span.interpreted { |
219 |
font-family: sans-serif } |
220 |
|
221 |
span.option-argument { |
222 |
font-style: italic } |
223 |
|
224 |
span.pre { |
225 |
white-space: pre } |
226 |
|
227 |
span.problematic { |
228 |
color: red } |
229 |
|
230 |
table { |
231 |
margin-top: 0.5em ; |
232 |
margin-bottom: 0.5em } |
233 |
|
234 |
td, th { |
235 |
padding-left: 0.5em ; |
236 |
padding-right: 0.5em ; |
237 |
vertical-align: top } |
238 |
|
239 |
td.num { |
240 |
text-align: right } |
241 |
|
242 |
th.field-name { |
243 |
font-weight: bold ; |
244 |
text-align: left ; |
245 |
white-space: nowrap } |
246 |
|
247 |
h1 tt, h2 tt, h3 tt, h4 tt, h5 tt, h6 tt { |
248 |
font-size: 100% } |
249 |
|
250 |
tt { |
251 |
background-color: #eeeeee } |
252 |
|
253 |
ul.auto-toc { |
254 |
list-style-type: none } |
255 |
|
256 |
</style> |
257 |
</head> |
258 |
<body bgcolor="white"> |
259 |
<table class="navigation" cellpadding="0" cellspacing="0" |
260 |
width="100%" border="0"> |
261 |
<tr><td class="navicon" width="150" height="35"> |
262 |
<a href="http://www.gentoo.org/" title="Gentoo Linux Home Page"> |
263 |
<img src="http://www.gentoo.org/images/gentoo-new.gif" alt="[Gentoo]" |
264 |
border="0" width="150" height="35" /></a></td> |
265 |
<td class="textlinks" align="left"> |
266 |
[<b><a href="http://www.gentoo.org/">Gentoo Linux Home</a></b>] |
267 |
[<b><a href="http://www.gentoo.org/proj/en/glep/">GLEP Index</a></b>] |
268 |
[<b><a href="http://www.gentoo.org/proj/en/glep/glep-0031.txt">GLEP Source</a></b>] |
269 |
</td></tr></table> |
270 |
<table class="rfc2822 docutils field-list" frame="void" rules="none"> |
271 |
<col class="field-name" /> |
272 |
<col class="field-body" /> |
273 |
<tbody valign="top"> |
274 |
<tr class="field"><th class="field-name">GLEP:</th><td class="field-body">31</td> |
275 |
</tr> |
276 |
<tr class="field"><th class="field-name">Title:</th><td class="field-body">Character Sets for Portage Tree Items</td> |
277 |
</tr> |
278 |
<tr class="field"><th class="field-name">Version:</th><td class="field-body">1.5</td> |
279 |
</tr> |
280 |
<tr class="field"><th class="field-name">Author:</th><td class="field-body">Ciaran McCreesh <ciaranm at gentoo.org></td> |
281 |
</tr> |
282 |
<tr class="field"><th class="field-name">Last-Modified:</th><td class="field-body"><a class="reference" href="http://www.gentoo.org/cgi-bin/viewcvs.cgi/xml/htdocs/proj/en/glep/glep-0031.txt?cvsroot=gentoo">2005/11/07 22:26:59</a></td> |
283 |
</tr> |
284 |
<tr class="field"><th class="field-name">Status:</th><td class="field-body">Approved</td> |
285 |
</tr> |
286 |
<tr class="field"><th class="field-name">Type:</th><td class="field-body">Standards Track</td> |
287 |
</tr> |
288 |
<tr class="field"><th class="field-name">Content-Type:</th><td class="field-body"><a class="reference" href="glep-0002.html">text/x-rst</a></td> |
289 |
</tr> |
290 |
<tr class="field"><th class="field-name">Created:</th><td class="field-body">27-Oct-2004</td> |
291 |
</tr> |
292 |
<tr class="field"><th class="field-name">Post-History:</th><td class="field-body">28-Oct-2004, 1-Nov-2004, 11-Nov-2004</td> |
293 |
</tr> |
294 |
</tbody> |
295 |
</table> |
296 |
<hr /> |
297 |
<div class="contents topic"> |
298 |
<p class="topic-title first"><a id="contents" name="contents">Contents</a></p> |
299 |
<ul class="simple"> |
300 |
<li><a class="reference" href="#abstract" id="id9" name="id9">Abstract</a></li> |
301 |
<li><a class="reference" href="#status" id="id10" name="id10">Status</a></li> |
302 |
<li><a class="reference" href="#motivation" id="id11" name="id11">Motivation</a></li> |
303 |
<li><a class="reference" href="#specification" id="id12" name="id12">Specification</a><ul> |
304 |
<li><a class="reference" href="#changelog-and-metadata-character-sets" id="id13" name="id13">ChangeLog and Metadata Character Sets</a></li> |
305 |
<li><a class="reference" href="#ebuild-and-eclass-character-sets" id="id14" name="id14">Ebuild and Eclass Character Sets</a></li> |
306 |
<li><a class="reference" href="#files-entries-character-sets" id="id15" name="id15">files/ Entries Character Sets</a></li> |
307 |
<li><a class="reference" href="#suitable-characters-for-file-and-directory-names" id="id16" name="id16">Suitable Characters for File and Directory Names</a></li> |
308 |
</ul> |
309 |
</li> |
310 |
<li><a class="reference" href="#backwards-compatibility" id="id17" name="id17">Backwards Compatibility</a></li> |
311 |
<li><a class="reference" href="#references" id="id18" name="id18">References</a></li> |
312 |
<li><a class="reference" href="#copyright" id="id19" name="id19">Copyright</a></li> |
313 |
</ul> |
314 |
</div> |
315 |
<div class="section"> |
316 |
<h1><a class="toc-backref" href="#id9" id="abstract" name="abstract">Abstract</a></h1> |
317 |
<p>A set of guidelines regarding what characters are permissible in the |
318 |
portage tree and how they should be encoded is required.</p> |
319 |
</div> |
320 |
<div class="section"> |
321 |
<h1><a class="toc-backref" href="#id10" id="status" name="status">Status</a></h1> |
322 |
<p>Approved on 8-Nov-2004 assuming that implementation will include |
323 |
documentation for correctly encoding files within nano.</p> |
324 |
</div> |
325 |
<div class="section"> |
326 |
<h1><a class="toc-backref" href="#id11" id="motivation" name="motivation">Motivation</a></h1> |
327 |
<p>At present we have several developers and many more users whose names |
328 |
require characters (for example, accents) which are not part of the |
329 |
standard 'safe' 0..127 ASCII range. There is no current standard on how |
330 |
these should be represented, leading to inconsistency across the tree.</p> |
331 |
<p>Although the issues involved have been discussed informally many times, no |
332 |
official decision has been made.</p> |
333 |
</div> |
334 |
<div class="section"> |
335 |
<h1><a class="toc-backref" href="#id12" id="specification" name="specification">Specification</a></h1> |
336 |
<div class="section"> |
337 |
<h2><a class="toc-backref" href="#id13" id="changelog-and-metadata-character-sets" name="changelog-and-metadata-character-sets">ChangeLog and Metadata Character Sets</a></h2> |
338 |
<p>It is proposed that UTF-8 (<a class="footnote-reference" href="#id5" id="id1" name="id1">[1]</a>) is used for encoding ChangeLog and |
339 |
metadata.xml files inside the portage tree.</p> |
340 |
<p>UTF-8 allows the full range of Unicode (<a class="footnote-reference" href="#id6" id="id2" name="id2">[2]</a>) characters to be expressed, |
341 |
which is necessary given the diversity of the Gentoo developer- and |
342 |
user-base. It is character-compatible with ASCII for the 0..127 |
343 |
characters and does not significantly increase the storage requirements |
344 |
for files which consist mainly of American English characters. It is |
345 |
widely supported, widely used and an official standard.</p> |
346 |
<p>The ISO-8859-* character sets (<a class="footnote-reference" href="#id7" id="id3" name="id3">[3]</a>) would <em>not</em> be appropriate since they |
347 |
cannot express the full range of required characters.</p> |
348 |
</div> |
349 |
<div class="section"> |
350 |
<h2><a class="toc-backref" href="#id14" id="ebuild-and-eclass-character-sets" name="ebuild-and-eclass-character-sets">Ebuild and Eclass Character Sets</a></h2> |
351 |
<p>For the same reasons as previously, it is proposed that UTF-8 is used as |
352 |
the official encoding for ebuild and eclass files.</p> |
353 |
<p>However, developers should be warned that any code which is parsed by bash |
354 |
(in other words, non-comments), and any output which is echoed to the |
355 |
screen (for example, einfo messages) or given to portage (for example any |
356 |
of the standard global variables) must not use anything outside the |
357 |
regular ASCII 0..127 range for compatibility purposes.</p> |
358 |
</div> |
359 |
<div class="section"> |
360 |
<h2><a class="toc-backref" href="#id15" id="files-entries-character-sets" name="files-entries-character-sets">files/ Entries Character Sets</a></h2> |
361 |
<p>Patches must clearly be in the same character set as the file they are |
362 |
patching. For other files/ entries (for example, GNOME desktop files), |
363 |
consistency with the upstream-recommended character set is most sensible.</p> |
364 |
</div> |
365 |
<div class="section"> |
366 |
<h2><a class="toc-backref" href="#id16" id="suitable-characters-for-file-and-directory-names" name="suitable-characters-for-file-and-directory-names">Suitable Characters for File and Directory Names</a></h2> |
367 |
<p>Characters outside the ASCII 0..127 range cannot safely be used for file |
368 |
or directory names. (Of course, not all characters inside the ASCII 0..127 |
369 |
range can be used safely either.)</p> |
370 |
</div> |
371 |
</div> |
372 |
<div class="section"> |
373 |
<h1><a class="toc-backref" href="#id17" id="backwards-compatibility" name="backwards-compatibility">Backwards Compatibility</a></h1> |
374 |
<p>The existing tree uses a mixture of encodings. It would be straightforward |
375 |
to fix existing ChangeLogs and metadata files to use UTF-8.</p> |
376 |
<p>The <tt class="docutils literal"><span class="pre">echangelog</span></tt> tool is character-set agnostic. In order to properly |
377 |
enter UTF-8, developers would have to switch to a UTF-8 shell session. |
378 |
This only applies if the developer is entering new text which uses 'fancy' |
379 |
characters -- existing characters are not mangled.</p> |
380 |
<p>Certain text editors are incapable of handling UTF-8 cleanly. However, |
381 |
since the <tt class="docutils literal"><span class="pre">echangelog</span></tt> tool is generally the correct way to generate |
382 |
ChangeLog entries, this should not be a major problem. Generating |
383 |
metadata.xml files correctly in these editors could become problematic. |
384 |
The <tt class="docutils literal"><span class="pre">vim</span></tt> and <tt class="docutils literal"><span class="pre">emacs</span></tt> editors, which appear to be most widely used, |
385 |
are both capable of handling UTF-8 cleanly -- for vim, this could be |
386 |
configured automatically via the <tt class="docutils literal"><span class="pre">gentoo-syntax</span></tt> (<a class="footnote-reference" href="#id8" id="id4" name="id4">[4]</a>) package.</p> |
387 |
</div> |
388 |
<div class="section"> |
389 |
<h1><a class="toc-backref" href="#id18" id="references" name="references">References</a></h1> |
390 |
<table class="docutils footnote" frame="void" id="id5" rules="none"> |
391 |
<colgroup><col class="label" /><col /></colgroup> |
392 |
<tbody valign="top"> |
393 |
<tr><td class="label"><a class="fn-backref" href="#id1" name="id5">[1]</a></td><td><a class="reference" href="http://www.faqs.org/rfcs/rfc3629.html">RFC 3629</a>: UTF-8, a transformation format of ISO 10646 |
394 |
<a class="reference" href="http://www.ietf.org/rfc/rfc3629.txt">http://www.ietf.org/rfc/rfc3629.txt</a></td></tr> |
395 |
</tbody> |
396 |
</table> |
397 |
<table class="docutils footnote" frame="void" id="id6" rules="none"> |
398 |
<colgroup><col class="label" /><col /></colgroup> |
399 |
<tbody valign="top"> |
400 |
<tr><td class="label"><a class="fn-backref" href="#id2" name="id6">[2]</a></td><td>ISO/IEC 10646 (Universal Multiple-Octet Coded Character Set)</td></tr> |
401 |
</tbody> |
402 |
</table> |
403 |
<table class="docutils footnote" frame="void" id="id7" rules="none"> |
404 |
<colgroup><col class="label" /><col /></colgroup> |
405 |
<tbody valign="top"> |
406 |
<tr><td class="label"><a class="fn-backref" href="#id3" name="id7">[3]</a></td><td>ISO/IEC 8859 (8-bit single-byte coded graphic character sets)</td></tr> |
407 |
</tbody> |
408 |
</table> |
409 |
<table class="docutils footnote" frame="void" id="id8" rules="none"> |
410 |
<colgroup><col class="label" /><col /></colgroup> |
411 |
<tbody valign="top"> |
412 |
<tr><td class="label"><a class="fn-backref" href="#id4" name="id8">[4]</a></td><td>The app-vim/gentoo-syntax package, |
413 |
<a class="reference" href="https://developer.berlios.de/projects/gentoo-syntax/">https://developer.berlios.de/projects/gentoo-syntax/</a></td></tr> |
414 |
</tbody> |
415 |
</table> |
416 |
</div> |
417 |
<div class="section"> |
418 |
<h1><a class="toc-backref" href="#id19" id="copyright" name="copyright">Copyright</a></h1> |
419 |
<p>This document has been placed in the public domain.</p> |
420 |
<!-- vim: set tw=74 fileencoding=utf-8 : --> |
421 |
</div> |
422 |
|
423 |
</div> |
424 |
<div class="footer"> |
425 |
<hr class="footer" /> |
426 |
<a class="reference" href="glep-0031.txt">View document source</a>. |
427 |
Generated on: 2006-10-10 20:23 UTC. |
428 |
Generated by <a class="reference" href="http://docutils.sourceforge.net/">Docutils</a> from <a class="reference" href="http://docutils.sourceforge.net/rst.html">reStructuredText</a> source. |
429 |
|
430 |
</div> |
431 |
</body> |
432 |
</html> |
433 |
|