Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
3c77631c
Commit
3c77631c
authored
May 29, 2020
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
b9e8cca9
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
366 additions
and
0 deletions
+366
-0
etree_lxml.py
...-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py
+366
-0
No files found.
stbi/Lib/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py
0 → 100644
View file @
3c77631c
"""Module for supporting the lxml.etree library. The idea here is to use as much
of the native library as possible, without using fragile hacks like custom element
names that break between releases. The downside of this is that we cannot represent
all possible trees; specifically the following are known to cause problems:
Text or comments as siblings of the root element
Docypes with no name
When any of these things occur, we emit a DataLossWarning
"""
from
__future__
import
absolute_import
,
division
,
unicode_literals
# pylint:disable=protected-access
import
warnings
import
re
import
sys
from
.
import
base
from
..constants
import
DataLossWarning
from
..
import
constants
from
.
import
etree
as
etree_builders
from
..
import
_ihatexml
import
lxml.etree
as
etree
fullTree
=
True
tag_regexp
=
re
.
compile
(
"{([^}]*)}(.*)"
)
comment_type
=
etree
.
Comment
(
"asd"
)
.
tag
class
DocumentType
(
object
):
def
__init__
(
self
,
name
,
publicId
,
systemId
):
self
.
name
=
name
self
.
publicId
=
publicId
self
.
systemId
=
systemId
class
Document
(
object
):
def
__init__
(
self
):
self
.
_elementTree
=
None
self
.
_childNodes
=
[]
def
appendChild
(
self
,
element
):
self
.
_elementTree
.
getroot
()
.
addnext
(
element
.
_element
)
def
_getChildNodes
(
self
):
return
self
.
_childNodes
childNodes
=
property
(
_getChildNodes
)
def
testSerializer
(
element
):
rv
=
[]
infosetFilter
=
_ihatexml
.
InfosetFilter
(
preventDoubleDashComments
=
True
)
def
serializeElement
(
element
,
indent
=
0
):
if
not
hasattr
(
element
,
"tag"
):
if
hasattr
(
element
,
"getroot"
):
# Full tree case
rv
.
append
(
"#document"
)
if
element
.
docinfo
.
internalDTD
:
if
not
(
element
.
docinfo
.
public_id
or
element
.
docinfo
.
system_url
):
dtd_str
=
"<!DOCTYPE
%
s>"
%
element
.
docinfo
.
root_name
else
:
dtd_str
=
"""<!DOCTYPE
%
s "
%
s" "
%
s">"""
%
(
element
.
docinfo
.
root_name
,
element
.
docinfo
.
public_id
,
element
.
docinfo
.
system_url
)
rv
.
append
(
"|
%
s
%
s"
%
(
' '
*
(
indent
+
2
),
dtd_str
))
next_element
=
element
.
getroot
()
while
next_element
.
getprevious
()
is
not
None
:
next_element
=
next_element
.
getprevious
()
while
next_element
is
not
None
:
serializeElement
(
next_element
,
indent
+
2
)
next_element
=
next_element
.
getnext
()
elif
isinstance
(
element
,
str
)
or
isinstance
(
element
,
bytes
):
# Text in a fragment
assert
isinstance
(
element
,
str
)
or
sys
.
version_info
[
0
]
==
2
rv
.
append
(
"|
%
s
\"
%
s
\"
"
%
(
' '
*
indent
,
element
))
else
:
# Fragment case
rv
.
append
(
"#document-fragment"
)
for
next_element
in
element
:
serializeElement
(
next_element
,
indent
+
2
)
elif
element
.
tag
==
comment_type
:
rv
.
append
(
"|
%
s<!--
%
s -->"
%
(
' '
*
indent
,
element
.
text
))
if
hasattr
(
element
,
"tail"
)
and
element
.
tail
:
rv
.
append
(
"|
%
s
\"
%
s
\"
"
%
(
' '
*
indent
,
element
.
tail
))
else
:
assert
isinstance
(
element
,
etree
.
_Element
)
nsmatch
=
etree_builders
.
tag_regexp
.
match
(
element
.
tag
)
if
nsmatch
is
not
None
:
ns
=
nsmatch
.
group
(
1
)
tag
=
nsmatch
.
group
(
2
)
prefix
=
constants
.
prefixes
[
ns
]
rv
.
append
(
"|
%
s<
%
s
%
s>"
%
(
' '
*
indent
,
prefix
,
infosetFilter
.
fromXmlName
(
tag
)))
else
:
rv
.
append
(
"|
%
s<
%
s>"
%
(
' '
*
indent
,
infosetFilter
.
fromXmlName
(
element
.
tag
)))
if
hasattr
(
element
,
"attrib"
):
attributes
=
[]
for
name
,
value
in
element
.
attrib
.
items
():
nsmatch
=
tag_regexp
.
match
(
name
)
if
nsmatch
is
not
None
:
ns
,
name
=
nsmatch
.
groups
()
name
=
infosetFilter
.
fromXmlName
(
name
)
prefix
=
constants
.
prefixes
[
ns
]
attr_string
=
"
%
s
%
s"
%
(
prefix
,
name
)
else
:
attr_string
=
infosetFilter
.
fromXmlName
(
name
)
attributes
.
append
((
attr_string
,
value
))
for
name
,
value
in
sorted
(
attributes
):
rv
.
append
(
'|
%
s
%
s="
%
s"'
%
(
' '
*
(
indent
+
2
),
name
,
value
))
if
element
.
text
:
rv
.
append
(
"|
%
s
\"
%
s
\"
"
%
(
' '
*
(
indent
+
2
),
element
.
text
))
indent
+=
2
for
child
in
element
:
serializeElement
(
child
,
indent
)
if
hasattr
(
element
,
"tail"
)
and
element
.
tail
:
rv
.
append
(
"|
%
s
\"
%
s
\"
"
%
(
' '
*
(
indent
-
2
),
element
.
tail
))
serializeElement
(
element
,
0
)
return
"
\n
"
.
join
(
rv
)
def
tostring
(
element
):
"""Serialize an element and its child nodes to a string"""
rv
=
[]
def
serializeElement
(
element
):
if
not
hasattr
(
element
,
"tag"
):
if
element
.
docinfo
.
internalDTD
:
if
element
.
docinfo
.
doctype
:
dtd_str
=
element
.
docinfo
.
doctype
else
:
dtd_str
=
"<!DOCTYPE
%
s>"
%
element
.
docinfo
.
root_name
rv
.
append
(
dtd_str
)
serializeElement
(
element
.
getroot
())
elif
element
.
tag
==
comment_type
:
rv
.
append
(
"<!--
%
s-->"
%
(
element
.
text
,))
else
:
# This is assumed to be an ordinary element
if
not
element
.
attrib
:
rv
.
append
(
"<
%
s>"
%
(
element
.
tag
,))
else
:
attr
=
" "
.
join
([
"
%
s=
\"
%
s
\"
"
%
(
name
,
value
)
for
name
,
value
in
element
.
attrib
.
items
()])
rv
.
append
(
"<
%
s
%
s>"
%
(
element
.
tag
,
attr
))
if
element
.
text
:
rv
.
append
(
element
.
text
)
for
child
in
element
:
serializeElement
(
child
)
rv
.
append
(
"</
%
s>"
%
(
element
.
tag
,))
if
hasattr
(
element
,
"tail"
)
and
element
.
tail
:
rv
.
append
(
element
.
tail
)
serializeElement
(
element
)
return
""
.
join
(
rv
)
class
TreeBuilder
(
base
.
TreeBuilder
):
documentClass
=
Document
doctypeClass
=
DocumentType
elementClass
=
None
commentClass
=
None
fragmentClass
=
Document
implementation
=
etree
def
__init__
(
self
,
namespaceHTMLElements
,
fullTree
=
False
):
builder
=
etree_builders
.
getETreeModule
(
etree
,
fullTree
=
fullTree
)
infosetFilter
=
self
.
infosetFilter
=
_ihatexml
.
InfosetFilter
(
preventDoubleDashComments
=
True
)
self
.
namespaceHTMLElements
=
namespaceHTMLElements
class
Attributes
(
dict
):
def
__init__
(
self
,
element
,
value
=
None
):
if
value
is
None
:
value
=
{}
self
.
_element
=
element
dict
.
__init__
(
self
,
value
)
# pylint:disable=non-parent-init-called
for
key
,
value
in
self
.
items
():
if
isinstance
(
key
,
tuple
):
name
=
"{
%
s}
%
s"
%
(
key
[
2
],
infosetFilter
.
coerceAttribute
(
key
[
1
]))
else
:
name
=
infosetFilter
.
coerceAttribute
(
key
)
self
.
_element
.
_element
.
attrib
[
name
]
=
value
def
__setitem__
(
self
,
key
,
value
):
dict
.
__setitem__
(
self
,
key
,
value
)
if
isinstance
(
key
,
tuple
):
name
=
"{
%
s}
%
s"
%
(
key
[
2
],
infosetFilter
.
coerceAttribute
(
key
[
1
]))
else
:
name
=
infosetFilter
.
coerceAttribute
(
key
)
self
.
_element
.
_element
.
attrib
[
name
]
=
value
class
Element
(
builder
.
Element
):
def
__init__
(
self
,
name
,
namespace
):
name
=
infosetFilter
.
coerceElement
(
name
)
builder
.
Element
.
__init__
(
self
,
name
,
namespace
=
namespace
)
self
.
_attributes
=
Attributes
(
self
)
def
_setName
(
self
,
name
):
self
.
_name
=
infosetFilter
.
coerceElement
(
name
)
self
.
_element
.
tag
=
self
.
_getETreeTag
(
self
.
_name
,
self
.
_namespace
)
def
_getName
(
self
):
return
infosetFilter
.
fromXmlName
(
self
.
_name
)
name
=
property
(
_getName
,
_setName
)
def
_getAttributes
(
self
):
return
self
.
_attributes
def
_setAttributes
(
self
,
attributes
):
self
.
_attributes
=
Attributes
(
self
,
attributes
)
attributes
=
property
(
_getAttributes
,
_setAttributes
)
def
insertText
(
self
,
data
,
insertBefore
=
None
):
data
=
infosetFilter
.
coerceCharacters
(
data
)
builder
.
Element
.
insertText
(
self
,
data
,
insertBefore
)
def
appendChild
(
self
,
child
):
builder
.
Element
.
appendChild
(
self
,
child
)
class
Comment
(
builder
.
Comment
):
def
__init__
(
self
,
data
):
data
=
infosetFilter
.
coerceComment
(
data
)
builder
.
Comment
.
__init__
(
self
,
data
)
def
_setData
(
self
,
data
):
data
=
infosetFilter
.
coerceComment
(
data
)
self
.
_element
.
text
=
data
def
_getData
(
self
):
return
self
.
_element
.
text
data
=
property
(
_getData
,
_setData
)
self
.
elementClass
=
Element
self
.
commentClass
=
Comment
# self.fragmentClass = builder.DocumentFragment
base
.
TreeBuilder
.
__init__
(
self
,
namespaceHTMLElements
)
def
reset
(
self
):
base
.
TreeBuilder
.
reset
(
self
)
self
.
insertComment
=
self
.
insertCommentInitial
self
.
initial_comments
=
[]
self
.
doctype
=
None
def
testSerializer
(
self
,
element
):
return
testSerializer
(
element
)
def
getDocument
(
self
):
if
fullTree
:
return
self
.
document
.
_elementTree
else
:
return
self
.
document
.
_elementTree
.
getroot
()
def
getFragment
(
self
):
fragment
=
[]
element
=
self
.
openElements
[
0
]
.
_element
if
element
.
text
:
fragment
.
append
(
element
.
text
)
fragment
.
extend
(
list
(
element
))
if
element
.
tail
:
fragment
.
append
(
element
.
tail
)
return
fragment
def
insertDoctype
(
self
,
token
):
name
=
token
[
"name"
]
publicId
=
token
[
"publicId"
]
systemId
=
token
[
"systemId"
]
if
not
name
:
warnings
.
warn
(
"lxml cannot represent empty doctype"
,
DataLossWarning
)
self
.
doctype
=
None
else
:
coercedName
=
self
.
infosetFilter
.
coerceElement
(
name
)
if
coercedName
!=
name
:
warnings
.
warn
(
"lxml cannot represent non-xml doctype"
,
DataLossWarning
)
doctype
=
self
.
doctypeClass
(
coercedName
,
publicId
,
systemId
)
self
.
doctype
=
doctype
def
insertCommentInitial
(
self
,
data
,
parent
=
None
):
assert
parent
is
None
or
parent
is
self
.
document
assert
self
.
document
.
_elementTree
is
None
self
.
initial_comments
.
append
(
data
)
def
insertCommentMain
(
self
,
data
,
parent
=
None
):
if
(
parent
==
self
.
document
and
self
.
document
.
_elementTree
.
getroot
()[
-
1
]
.
tag
==
comment_type
):
warnings
.
warn
(
"lxml cannot represent adjacent comments beyond the root elements"
,
DataLossWarning
)
super
(
TreeBuilder
,
self
)
.
insertComment
(
data
,
parent
)
def
insertRoot
(
self
,
token
):
# Because of the way libxml2 works, it doesn't seem to be possible to
# alter information like the doctype after the tree has been parsed.
# Therefore we need to use the built-in parser to create our initial
# tree, after which we can add elements like normal
docStr
=
""
if
self
.
doctype
:
assert
self
.
doctype
.
name
docStr
+=
"<!DOCTYPE
%
s"
%
self
.
doctype
.
name
if
(
self
.
doctype
.
publicId
is
not
None
or
self
.
doctype
.
systemId
is
not
None
):
docStr
+=
(
' PUBLIC "
%
s" '
%
(
self
.
infosetFilter
.
coercePubid
(
self
.
doctype
.
publicId
or
""
)))
if
self
.
doctype
.
systemId
:
sysid
=
self
.
doctype
.
systemId
if
sysid
.
find
(
"'"
)
>=
0
and
sysid
.
find
(
'"'
)
>=
0
:
warnings
.
warn
(
"DOCTYPE system cannot contain single and double quotes"
,
DataLossWarning
)
sysid
=
sysid
.
replace
(
"'"
,
'U00027'
)
if
sysid
.
find
(
"'"
)
>=
0
:
docStr
+=
'"
%
s"'
%
sysid
else
:
docStr
+=
"'
%
s'"
%
sysid
else
:
docStr
+=
"''"
docStr
+=
">"
if
self
.
doctype
.
name
!=
token
[
"name"
]:
warnings
.
warn
(
"lxml cannot represent doctype with a different name to the root element"
,
DataLossWarning
)
docStr
+=
"<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
root
=
etree
.
fromstring
(
docStr
)
# Append the initial comments:
for
comment_token
in
self
.
initial_comments
:
comment
=
self
.
commentClass
(
comment_token
[
"data"
])
root
.
addprevious
(
comment
.
_element
)
# Create the root document and add the ElementTree to it
self
.
document
=
self
.
documentClass
()
self
.
document
.
_elementTree
=
root
.
getroottree
()
# Give the root element the right name
name
=
token
[
"name"
]
namespace
=
token
.
get
(
"namespace"
,
self
.
defaultNamespace
)
if
namespace
is
None
:
etree_tag
=
name
else
:
etree_tag
=
"{
%
s}
%
s"
%
(
namespace
,
name
)
root
.
tag
=
etree_tag
# Add the root element to the internal child/open data structures
root_element
=
self
.
elementClass
(
name
,
namespace
)
root_element
.
_element
=
root
self
.
document
.
_childNodes
.
append
(
root_element
)
self
.
openElements
.
append
(
root_element
)
# Reset to the default insert comment function
self
.
insertComment
=
self
.
insertCommentMain
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment