1 |
usata |
1.1 |
xmlformat is distributed under a BSD-style license. This license |
2 |
|
|
applies to the entire xmlformat distribution, with the exception of |
3 |
|
|
the REX parser (described below). |
4 |
|
|
|
5 |
|
|
Copyright (c) 2004, Kitebird, LLC. All rights reserved. |
6 |
|
|
|
7 |
|
|
Redistribution and use in source and binary forms, with or without |
8 |
|
|
modification, are permitted provided that the following conditions |
9 |
|
|
are met: |
10 |
|
|
|
11 |
|
|
1. Redistributions of source code must retain the above copyright |
12 |
|
|
notice, this list of conditions and the following disclaimer. |
13 |
|
|
|
14 |
|
|
2. Redistributions in binary form must reproduce the above copyright |
15 |
|
|
notice, this list of conditions and the following disclaimer in the |
16 |
|
|
documentation and/or other materials provided with the distribution. |
17 |
|
|
|
18 |
|
|
3. Neither the name of Kitebird nor the names of its contributors may |
19 |
|
|
be used to endorse or promote products derived from this software |
20 |
|
|
without specific prior written permission. |
21 |
|
|
|
22 |
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
23 |
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
24 |
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
25 |
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
26 |
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
27 |
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
28 |
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
29 |
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
30 |
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
31 |
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
32 |
|
|
POSSIBILITY OF SUCH DAMAGE. |
33 |
|
|
|
34 |
|
|
---------------------------------------------------------------------- |
35 |
|
|
The REX parser |
36 |
|
|
|
37 |
|
|
xmlformat contains code based on the REX parser, which is Copyright (c) 1998, |
38 |
|
|
Robert D. Cameron. REX is described in this document: |
39 |
|
|
|
40 |
|
|
http://www.cs.sfu.ca/~cameron/REX.html |
41 |
|
|
|
42 |
|
|
The document contains a Perl implementation of REX: |
43 |
|
|
|
44 |
|
|
--- begin REX code --- |
45 |
|
|
# REX/Perl 1.0 |
46 |
|
|
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", |
47 |
|
|
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser |
48 |
|
|
# University, November, 1998. |
49 |
|
|
# Copyright (c) 1998, Robert D. Cameron. |
50 |
|
|
# The following code may be freely used and distributed provided that |
51 |
|
|
# this copyright and citation notice remains intact and that modifications |
52 |
|
|
# or additions are clearly identified. |
53 |
|
|
|
54 |
|
|
$TextSE = "[^<]+"; |
55 |
|
|
$UntilHyphen = "[^-]*-"; |
56 |
|
|
$Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-"; |
57 |
|
|
$CommentCE = "$Until2Hyphens>?"; |
58 |
|
|
$UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; |
59 |
|
|
$CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>"; |
60 |
|
|
$S = "[ \\n\\t\\r]+"; |
61 |
|
|
$NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; |
62 |
|
|
$NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; |
63 |
|
|
$Name = "(?:$NameStrt)(?:$NameChar)*"; |
64 |
|
|
$QuoteSE = "\"[^\"]*\"|'[^']*'"; |
65 |
|
|
$DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*"; |
66 |
|
|
$MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; |
67 |
|
|
$S1 = "[\\n\\r\\t ]"; |
68 |
|
|
$UntilQMs = "[^?]*\\?+"; |
69 |
|
|
$PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>"; |
70 |
|
|
$DT_ItemSE = |
71 |
|
|
"<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S"; |
72 |
|
|
$DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?"; |
73 |
|
|
$DeclCE = |
74 |
|
|
"--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; |
75 |
|
|
$PI_CE = "$Name(?:$PI_Tail)?"; |
76 |
|
|
$EndTagCE = "$Name(?:$S)?>?"; |
77 |
|
|
$AttValSE = "\"[^<\"]*\"|'[^<']*'"; |
78 |
|
|
$ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?"; |
79 |
|
|
$MarkupSPE = |
80 |
|
|
"<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)"; |
81 |
|
|
$XML_SPE = "$TextSE|$MarkupSPE"; |
82 |
|
|
|
83 |
|
|
|
84 |
|
|
sub ShallowParse { |
85 |
|
|
my($XML_document) = @_; |
86 |
|
|
return $XML_document =~ /$XML_SPE/g; |
87 |
|
|
} |
88 |
|
|
--- end REX code --- |
89 |
|
|
|
90 |
|
|
The Perl and Ruby implementations of xmlformat contain parsers that |
91 |
|
|
are based on the preceding code and are essentially the same, with the |
92 |
|
|
exception of changes to variable and function names. |
93 |
|
|
|