1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
module REXML
# Defines a number of tokens used for parsing XML. Not for general
# consumption.
module XMLTokens
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
#
# [4] NameStartChar ::=
# ":" |
# [A-Z] |
# "_" |
# [a-z] |
# [#xC0-#xD6] |
# [#xD8-#xF6] |
# [#xF8-#x2FF] |
# [#x370-#x37D] |
# [#x37F-#x1FFF] |
# [#x200C-#x200D] |
# [#x2070-#x218F] |
# [#x2C00-#x2FEF] |
# [#x3001-#xD7FF] |
# [#xF900-#xFDCF] |
# [#xFDF0-#xFFFD] |
# [#x10000-#xEFFFF]
name_start_chars = [
":",
"A-Z",
"_",
"a-z",
"\\u00C0-\\u00D6",
"\\u00D8-\\u00F6",
"\\u00F8-\\u02FF",
"\\u0370-\\u037D",
"\\u037F-\\u1FFF",
"\\u200C-\\u200D",
"\\u2070-\\u218F",
"\\u2C00-\\u2FEF",
"\\u3001-\\uD7FF",
"\\uF900-\\uFDCF",
"\\uFDF0-\\uFFFD",
"\\u{10000}-\\u{EFFFF}",
]
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
#
# [4a] NameChar ::=
# NameStartChar |
# "-" |
# "." |
# [0-9] |
# #xB7 |
# [#x0300-#x036F] |
# [#x203F-#x2040]
name_chars = name_start_chars + [
"\\-",
"\\.",
"0-9",
"\\u00B7",
"\\u0300-\\u036F",
"\\u203F-\\u2040",
]
NAME_START_CHAR = "[#{name_start_chars.join('')}]"
NAME_CHAR = "[#{name_chars.join('')}]"
NAMECHAR = NAME_CHAR # deprecated. Use NAME_CHAR instead.
# From http://www.w3.org/TR/xml-names11/#NT-NCName
#
# [6] NCNameStartChar ::= NameStartChar - ':'
ncname_start_chars = name_start_chars - [":"]
# From http://www.w3.org/TR/xml-names11/#NT-NCName
#
# [5] NCNameChar ::= NameChar - ':'
ncname_chars = name_chars - [":"]
NCNAME_STR = "[#{ncname_start_chars.join('')}][#{ncname_chars.join('')}]*"
NAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAME = "(#{NAME_START_CHAR}#{NAME_CHAR}*)"
NMTOKEN = "(?:#{NAME_CHAR})+"
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
#REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
#ENTITYREF = "&#{NAME};"
#CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
end
end
|