master #1
@@ -0,0 +1,74 @@
|
||||
name: Deploy on merge to main
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
# ── 1. Checkout ──────────────────────────────────────────────────────────
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# ── 2. PHP + Composer ────────────────────────────────────────────────────
|
||||
- name: Set up PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: '8.3'
|
||||
extensions: pdo, pdo_sqlite, sqlite3
|
||||
|
||||
- name: Install Composer dependencies (production only)
|
||||
run: composer install --no-dev --optimize-autoloader
|
||||
|
||||
# ── 3. Pobierz aktualną bazę SQLite z serwera ────────────────────────────
|
||||
# data.sqlite3 z repo NIE trafia na serwer – ściągamy produkcyjną wersję,
|
||||
# uruchamiamy na niej migracje i wgrywamy z powrotem.
|
||||
- name: Install lftp
|
||||
run: sudo apt-get install -y lftp
|
||||
|
||||
- name: Download production data.sqlite3 via FTP
|
||||
run: |
|
||||
lftp -u "${{ secrets.FTP_USER }}","${{ secrets.FTP_PASS }}" \
|
||||
-e "set ftp:ssl-allow no; \
|
||||
set ssl:verify-certificate no; \
|
||||
get ${{ secrets.FTP_REMOTE_DIR }}/data.sqlite3 -o ./data.sqlite3; \
|
||||
quit" \
|
||||
${{ secrets.FTP_HOST }}
|
||||
|
||||
# ── 4. Uruchom migracje Phinx na pobranej bazie ──────────────────────────
|
||||
# phinx.php: 'name' => __DIR__ . '/data' → plik data.sqlite3
|
||||
- name: Run Phinx migrations
|
||||
run: ./vendor/bin/phinx migrate -e main
|
||||
|
||||
# ── 5. Wgraj pliki na serwer (z zaktualizowaną bazą) ────────────────────
|
||||
- name: Deploy files to server via FTP
|
||||
uses: SamKirkland/FTP-Deploy-Action@v4.3.5
|
||||
with:
|
||||
server: ${{ secrets.FTP_HOST }}
|
||||
username: ${{ secrets.FTP_USER }}
|
||||
password: ${{ secrets.FTP_PASS }}
|
||||
local-dir: ./
|
||||
server-dir: ${{ secrets.FTP_REMOTE_DIR }}/
|
||||
exclude: |
|
||||
**/.git/**
|
||||
**/.gitea/**
|
||||
**/tests/**
|
||||
**/docker/**
|
||||
**/html_template/**
|
||||
**/lib/**
|
||||
**/var/cache/**
|
||||
**/logs/**
|
||||
docker-compose*.yml
|
||||
Dockerfile
|
||||
phpunit.xml
|
||||
phpcs.xml
|
||||
phpstan.neon.dist
|
||||
README.md
|
||||
CONTRIBUTING.md
|
||||
.env*
|
||||
local.data.sqlite3
|
||||
server.data.sqlite3
|
||||
@@ -5,3 +5,4 @@
|
||||
/logs/*
|
||||
!/logs/README.md
|
||||
.phpunit.result.cache
|
||||
data.sqlite3
|
||||
@@ -1,12 +0,0 @@
|
||||
github:
|
||||
repo: seba-aln/tar-pit
|
||||
dashboard:
|
||||
port: 5000
|
||||
workers:
|
||||
count: 3
|
||||
opencode:
|
||||
url: http://localhost:5002
|
||||
pipeline:
|
||||
max_retries: 5
|
||||
sprint:
|
||||
tasks_per_sprint: 10
|
||||
Vendored
-11
@@ -1,11 +0,0 @@
|
||||
{
|
||||
"sqltools.connections": [
|
||||
{
|
||||
"previewLimit": 50,
|
||||
"driver": "SQLite",
|
||||
"name": "26.molenda.net",
|
||||
"database": "data.sqlite"
|
||||
}
|
||||
],
|
||||
"sqltools.useNodeRuntime": true
|
||||
}
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
FROM php:8.3-fpm
|
||||
|
||||
WORKDIR /var/www
|
||||
|
||||
# Install system dependencies and PHP extensions
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
unzip \
|
||||
zip \
|
||||
libzip-dev \
|
||||
sqlite3 \
|
||||
libsqlite3-dev \
|
||||
libonig-dev \
|
||||
&& docker-php-ext-configure zip \
|
||||
&& docker-php-ext-install -j$(nproc) pdo pdo_sqlite zip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Composer from the official image
|
||||
COPY --from=composer:2 /usr/bin/composer /usr/bin/composer
|
||||
|
||||
# Ensure www-data owns the working dir
|
||||
RUN chown -R www-data:www-data /var/www || true
|
||||
|
||||
EXPOSE 9000
|
||||
|
||||
CMD ["php-fpm"]
|
||||
@@ -35,6 +35,7 @@ return function (App $app) {
|
||||
// Protected admin area
|
||||
$app->group('/admin', function (Group $group) {
|
||||
$group->get('', \App\Application\Actions\Admin\DashboardAction::class);
|
||||
$group->get('/blog-visits', \App\Application\Actions\Admin\BlogVisitsAction::class);
|
||||
$group->get('/contents', \App\Application\Actions\Content\ContentCrudAction::class . ':list');
|
||||
$group->get('/access-logs', \App\Application\Actions\Admin\AccessLogsAction::class);
|
||||
$group->map(['GET', 'POST'], '/contents/create', \App\Application\Actions\Content\ContentCrudAction::class . ':create');
|
||||
Binary file not shown.
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use Phinx\Migration\AbstractMigration;
|
||||
|
||||
final class CreateBlogVisits extends AbstractMigration
|
||||
{
|
||||
public function change(): void
|
||||
{
|
||||
// create table if it does not exist
|
||||
$table = $this->table('blog_visits');
|
||||
if (!$table->exists()) {
|
||||
$table->addColumn('ip', 'string', ['null' => true, 'limit' => 255])
|
||||
->addColumn('useragent', 'text', ['null' => true])
|
||||
->addColumn('cnt', 'integer', ['default' => 0])
|
||||
->addColumn('first_seen', 'integer', ['null' => true])
|
||||
->addColumn('last_seen', 'integer', ['null' => true])
|
||||
->addIndex(['ip', 'useragent'], ['unique' => true, 'name' => 'idx_ip_useragent'])
|
||||
->create();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
php:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
working_dir: /var/www
|
||||
volumes:
|
||||
- ./:/var/www:cached
|
||||
- ./docker/php/memory.ini:/usr/local/etc/php/conf.d/memory.ini:ro
|
||||
environment:
|
||||
COMPOSER_ALLOW_SUPERUSER: "1"
|
||||
mem_limit: 1G
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1G
|
||||
networks:
|
||||
- appnet
|
||||
|
||||
nginx:
|
||||
image: nginx:1.25-alpine
|
||||
ports:
|
||||
- "8080:80"
|
||||
volumes:
|
||||
- ./:/var/www:ro
|
||||
- ./docker/nginx/default.conf:/etc/nginx/conf.d/default.conf:ro
|
||||
depends_on:
|
||||
- php
|
||||
networks:
|
||||
- appnet
|
||||
|
||||
networks:
|
||||
appnet:
|
||||
driver: bridge
|
||||
@@ -0,0 +1,25 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name _;
|
||||
root /var/www/public;
|
||||
index index.php index.html;
|
||||
|
||||
access_log /var/log/nginx/access.log;
|
||||
error_log /var/log/nginx/error.log;
|
||||
|
||||
location / {
|
||||
try_files $uri $uri/ /index.php?$query_string;
|
||||
}
|
||||
|
||||
location ~ \.php$ {
|
||||
include fastcgi_params;
|
||||
fastcgi_pass php:9000;
|
||||
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
|
||||
fastcgi_index index.php;
|
||||
}
|
||||
|
||||
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg)$ {
|
||||
expires max;
|
||||
log_not_found off;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
memory_limit = 1G
|
||||
@@ -1,241 +0,0 @@
|
||||
<#
|
||||
.Synopsis
|
||||
Activate a Python virtual environment for the current PowerShell session.
|
||||
|
||||
.Description
|
||||
Pushes the python executable for a virtual environment to the front of the
|
||||
$Env:PATH environment variable and sets the prompt to signify that you are
|
||||
in a Python virtual environment. Makes use of the command line switches as
|
||||
well as the `pyvenv.cfg` file values present in the virtual environment.
|
||||
|
||||
.Parameter VenvDir
|
||||
Path to the directory that contains the virtual environment to activate. The
|
||||
default value for this is the parent of the directory that the Activate.ps1
|
||||
script is located within.
|
||||
|
||||
.Parameter Prompt
|
||||
The prompt prefix to display when this virtual environment is activated. By
|
||||
default, this prompt is the name of the virtual environment folder (VenvDir)
|
||||
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
||||
|
||||
.Example
|
||||
Activate.ps1
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Verbose
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and shows extra information about the activation as it executes.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
||||
Activates the Python virtual environment located in the specified location.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Prompt "MyPython"
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and prefixes the current prompt with the specified string (surrounded in
|
||||
parentheses) while the virtual environment is active.
|
||||
|
||||
.Notes
|
||||
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
||||
execution policy for the user. You can do this by issuing the following PowerShell
|
||||
command:
|
||||
|
||||
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||
|
||||
For more information on Execution Policies:
|
||||
https://go.microsoft.com/fwlink/?LinkID=135170
|
||||
|
||||
#>
|
||||
Param(
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$VenvDir,
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$Prompt
|
||||
)
|
||||
|
||||
<# Function declarations --------------------------------------------------- #>
|
||||
|
||||
<#
|
||||
.Synopsis
|
||||
Remove all shell session elements added by the Activate script, including the
|
||||
addition of the virtual environment's Python executable from the beginning of
|
||||
the PATH variable.
|
||||
|
||||
.Parameter NonDestructive
|
||||
If present, do not remove this function from the global namespace for the
|
||||
session.
|
||||
|
||||
#>
|
||||
function global:deactivate ([switch]$NonDestructive) {
|
||||
# Revert to original values
|
||||
|
||||
# The prior prompt:
|
||||
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
||||
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
||||
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
|
||||
# The prior PYTHONHOME:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
}
|
||||
|
||||
# The prior PATH:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
||||
}
|
||||
|
||||
# Just remove the VIRTUAL_ENV altogether:
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV
|
||||
}
|
||||
|
||||
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
||||
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
||||
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
||||
}
|
||||
|
||||
# Leave deactivate function in the global namespace if requested:
|
||||
if (-not $NonDestructive) {
|
||||
Remove-Item -Path function:deactivate
|
||||
}
|
||||
}
|
||||
|
||||
<#
|
||||
.Description
|
||||
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
||||
given folder, and returns them in a map.
|
||||
|
||||
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
||||
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
||||
then it is considered a `key = value` line. The left hand string is the key,
|
||||
the right hand is the value.
|
||||
|
||||
If the value starts with a `'` or a `"` then the first and last character is
|
||||
stripped from the value before being captured.
|
||||
|
||||
.Parameter ConfigDir
|
||||
Path to the directory that contains the `pyvenv.cfg` file.
|
||||
#>
|
||||
function Get-PyVenvConfig(
|
||||
[String]
|
||||
$ConfigDir
|
||||
) {
|
||||
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
||||
|
||||
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
||||
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
||||
|
||||
# An empty map will be returned if no config file is found.
|
||||
$pyvenvConfig = @{ }
|
||||
|
||||
if ($pyvenvConfigPath) {
|
||||
|
||||
Write-Verbose "File exists, parse `key = value` lines"
|
||||
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
||||
|
||||
$pyvenvConfigContent | ForEach-Object {
|
||||
$keyval = $PSItem -split "\s*=\s*", 2
|
||||
if ($keyval[0] -and $keyval[1]) {
|
||||
$val = $keyval[1]
|
||||
|
||||
# Remove extraneous quotations around a string value.
|
||||
if ("'""".Contains($val.Substring(0, 1))) {
|
||||
$val = $val.Substring(1, $val.Length - 2)
|
||||
}
|
||||
|
||||
$pyvenvConfig[$keyval[0]] = $val
|
||||
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
||||
}
|
||||
}
|
||||
}
|
||||
return $pyvenvConfig
|
||||
}
|
||||
|
||||
|
||||
<# Begin Activate script --------------------------------------------------- #>
|
||||
|
||||
# Determine the containing directory of this script
|
||||
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$VenvExecDir = Get-Item -Path $VenvExecPath
|
||||
|
||||
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
||||
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
||||
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
||||
|
||||
# Set values required in priority: CmdLine, ConfigFile, Default
|
||||
# First, get the location of the virtual environment, it might not be
|
||||
# VenvExecDir if specified on the command line.
|
||||
if ($VenvDir) {
|
||||
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
||||
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
||||
Write-Verbose "VenvDir=$VenvDir"
|
||||
}
|
||||
|
||||
# Next, read the `pyvenv.cfg` file to determine any required value such
|
||||
# as `prompt`.
|
||||
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
||||
|
||||
# Next, set the prompt from the command line, or the config file, or
|
||||
# just use the name of the virtual environment folder.
|
||||
if ($Prompt) {
|
||||
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
||||
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
||||
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
||||
$Prompt = $pyvenvCfg['prompt'];
|
||||
}
|
||||
else {
|
||||
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virutal environment)"
|
||||
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
||||
$Prompt = Split-Path -Path $venvDir -Leaf
|
||||
}
|
||||
}
|
||||
|
||||
Write-Verbose "Prompt = '$Prompt'"
|
||||
Write-Verbose "VenvDir='$VenvDir'"
|
||||
|
||||
# Deactivate any currently active virtual environment, but leave the
|
||||
# deactivate function in place.
|
||||
deactivate -nondestructive
|
||||
|
||||
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
||||
# that there is an activated venv.
|
||||
$env:VIRTUAL_ENV = $VenvDir
|
||||
|
||||
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
||||
|
||||
Write-Verbose "Setting prompt to '$Prompt'"
|
||||
|
||||
# Set the prompt to include the env name
|
||||
# Make sure _OLD_VIRTUAL_PROMPT is global
|
||||
function global:_OLD_VIRTUAL_PROMPT { "" }
|
||||
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
||||
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
||||
|
||||
function global:prompt {
|
||||
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
||||
_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
}
|
||||
|
||||
# Clear PYTHONHOME
|
||||
if (Test-Path -Path Env:PYTHONHOME) {
|
||||
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
Remove-Item -Path Env:PYTHONHOME
|
||||
}
|
||||
|
||||
# Add the venv to the PATH
|
||||
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
||||
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
||||
@@ -1,66 +0,0 @@
|
||||
# This file must be used with "source bin/activate" *from bash*
|
||||
# you cannot run it directly
|
||||
|
||||
deactivate () {
|
||||
# reset old environment variables
|
||||
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
||||
PATH="${_OLD_VIRTUAL_PATH:-}"
|
||||
export PATH
|
||||
unset _OLD_VIRTUAL_PATH
|
||||
fi
|
||||
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
||||
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
||||
export PYTHONHOME
|
||||
unset _OLD_VIRTUAL_PYTHONHOME
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r 2> /dev/null
|
||||
fi
|
||||
|
||||
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
||||
PS1="${_OLD_VIRTUAL_PS1:-}"
|
||||
export PS1
|
||||
unset _OLD_VIRTUAL_PS1
|
||||
fi
|
||||
|
||||
unset VIRTUAL_ENV
|
||||
if [ ! "${1:-}" = "nondestructive" ] ; then
|
||||
# Self destruct!
|
||||
unset -f deactivate
|
||||
fi
|
||||
}
|
||||
|
||||
# unset irrelevant variables
|
||||
deactivate nondestructive
|
||||
|
||||
VIRTUAL_ENV="/Users/aln/Work/26.molenda.net/eeeeee/.venv"
|
||||
export VIRTUAL_ENV
|
||||
|
||||
_OLD_VIRTUAL_PATH="$PATH"
|
||||
PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
export PATH
|
||||
|
||||
# unset PYTHONHOME if set
|
||||
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
||||
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
||||
if [ -n "${PYTHONHOME:-}" ] ; then
|
||||
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
||||
unset PYTHONHOME
|
||||
fi
|
||||
|
||||
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
||||
_OLD_VIRTUAL_PS1="${PS1:-}"
|
||||
PS1="(.venv) ${PS1:-}"
|
||||
export PS1
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r 2> /dev/null
|
||||
fi
|
||||
@@ -1,25 +0,0 @@
|
||||
# This file must be used with "source bin/activate.csh" *from csh*.
|
||||
# You cannot run it directly.
|
||||
# Created by Davide Di Blasi <davidedb@gmail.com>.
|
||||
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
|
||||
|
||||
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate'
|
||||
|
||||
# Unset irrelevant variables.
|
||||
deactivate nondestructive
|
||||
|
||||
setenv VIRTUAL_ENV "/Users/aln/Work/26.molenda.net/eeeeee/.venv"
|
||||
|
||||
set _OLD_VIRTUAL_PATH="$PATH"
|
||||
setenv PATH "$VIRTUAL_ENV/bin:$PATH"
|
||||
|
||||
|
||||
set _OLD_VIRTUAL_PROMPT="$prompt"
|
||||
|
||||
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
|
||||
set prompt = "(.venv) $prompt"
|
||||
endif
|
||||
|
||||
alias pydoc python -m pydoc
|
||||
|
||||
rehash
|
||||
@@ -1,64 +0,0 @@
|
||||
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
|
||||
# (https://fishshell.com/); you cannot run it directly.
|
||||
|
||||
function deactivate -d "Exit virtual environment and return to normal shell environment"
|
||||
# reset old environment variables
|
||||
if test -n "$_OLD_VIRTUAL_PATH"
|
||||
set -gx PATH $_OLD_VIRTUAL_PATH
|
||||
set -e _OLD_VIRTUAL_PATH
|
||||
end
|
||||
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
|
||||
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
|
||||
set -e _OLD_VIRTUAL_PYTHONHOME
|
||||
end
|
||||
|
||||
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
|
||||
functions -e fish_prompt
|
||||
set -e _OLD_FISH_PROMPT_OVERRIDE
|
||||
functions -c _old_fish_prompt fish_prompt
|
||||
functions -e _old_fish_prompt
|
||||
end
|
||||
|
||||
set -e VIRTUAL_ENV
|
||||
if test "$argv[1]" != "nondestructive"
|
||||
# Self-destruct!
|
||||
functions -e deactivate
|
||||
end
|
||||
end
|
||||
|
||||
# Unset irrelevant variables.
|
||||
deactivate nondestructive
|
||||
|
||||
set -gx VIRTUAL_ENV "/Users/aln/Work/26.molenda.net/eeeeee/.venv"
|
||||
|
||||
set -gx _OLD_VIRTUAL_PATH $PATH
|
||||
set -gx PATH "$VIRTUAL_ENV/bin" $PATH
|
||||
|
||||
# Unset PYTHONHOME if set.
|
||||
if set -q PYTHONHOME
|
||||
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
|
||||
set -e PYTHONHOME
|
||||
end
|
||||
|
||||
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
|
||||
# fish uses a function instead of an env var to generate the prompt.
|
||||
|
||||
# Save the current fish_prompt function as the function _old_fish_prompt.
|
||||
functions -c fish_prompt _old_fish_prompt
|
||||
|
||||
# With the original prompt function renamed, we can override with our own.
|
||||
function fish_prompt
|
||||
# Save the return status of the last command.
|
||||
set -l old_status $status
|
||||
|
||||
# Output the venv prompt; color taken from the blue of the Python logo.
|
||||
printf "%s%s%s" (set_color 4B8BBE) "(.venv) " (set_color normal)
|
||||
|
||||
# Restore the return status of the previous command.
|
||||
echo "exit $old_status" | .
|
||||
# Output the original/"old" prompt.
|
||||
_old_fish_prompt
|
||||
end
|
||||
|
||||
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
|
||||
end
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/Users/aln/Work/26.molenda.net/eeeeee/.venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/Users/aln/Work/26.molenda.net/eeeeee/.venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/Users/aln/Work/26.molenda.net/eeeeee/.venv/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from pip._internal.cli.main import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
||||
@@ -1 +0,0 @@
|
||||
python3
|
||||
@@ -1 +0,0 @@
|
||||
/Library/Developer/CommandLineTools/usr/bin/python3
|
||||
@@ -1 +0,0 @@
|
||||
python3
|
||||
@@ -1,128 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import importlib
|
||||
import warnings
|
||||
|
||||
|
||||
is_pypy = '__pypy__' in sys.builtin_module_names
|
||||
|
||||
|
||||
warnings.filterwarnings('ignore',
|
||||
r'.+ distutils\b.+ deprecated',
|
||||
DeprecationWarning)
|
||||
|
||||
|
||||
def warn_distutils_present():
|
||||
if 'distutils' not in sys.modules:
|
||||
return
|
||||
if is_pypy and sys.version_info < (3, 7):
|
||||
# PyPy for 3.6 unconditionally imports distutils, so bypass the warning
|
||||
# https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
|
||||
return
|
||||
warnings.warn(
|
||||
"Distutils was imported before Setuptools, but importing Setuptools "
|
||||
"also replaces the `distutils` module in `sys.modules`. This may lead "
|
||||
"to undesirable behaviors or errors. To avoid these issues, avoid "
|
||||
"using distutils directly, ensure that setuptools is installed in the "
|
||||
"traditional way (e.g. not an editable install), and/or make sure "
|
||||
"that setuptools is always imported before distutils.")
|
||||
|
||||
|
||||
def clear_distutils():
|
||||
if 'distutils' not in sys.modules:
|
||||
return
|
||||
warnings.warn("Setuptools is replacing distutils.")
|
||||
mods = [name for name in sys.modules if re.match(r'distutils\b', name)]
|
||||
for name in mods:
|
||||
del sys.modules[name]
|
||||
|
||||
|
||||
def enabled():
|
||||
"""
|
||||
Allow selection of distutils by environment variable.
|
||||
"""
|
||||
which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib')
|
||||
return which == 'local'
|
||||
|
||||
|
||||
def ensure_local_distutils():
|
||||
clear_distutils()
|
||||
distutils = importlib.import_module('setuptools._distutils')
|
||||
distutils.__name__ = 'distutils'
|
||||
sys.modules['distutils'] = distutils
|
||||
|
||||
# sanity check that submodules load as expected
|
||||
core = importlib.import_module('distutils.core')
|
||||
assert '_distutils' in core.__file__, core.__file__
|
||||
|
||||
|
||||
def do_override():
|
||||
"""
|
||||
Ensure that the local copy of distutils is preferred over stdlib.
|
||||
|
||||
See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
|
||||
for more motivation.
|
||||
"""
|
||||
if enabled():
|
||||
warn_distutils_present()
|
||||
ensure_local_distutils()
|
||||
|
||||
|
||||
class DistutilsMetaFinder:
|
||||
def find_spec(self, fullname, path, target=None):
|
||||
if path is not None:
|
||||
return
|
||||
|
||||
method_name = 'spec_for_{fullname}'.format(**locals())
|
||||
method = getattr(self, method_name, lambda: None)
|
||||
return method()
|
||||
|
||||
def spec_for_distutils(self):
|
||||
import importlib.abc
|
||||
import importlib.util
|
||||
|
||||
class DistutilsLoader(importlib.abc.Loader):
|
||||
|
||||
def create_module(self, spec):
|
||||
return importlib.import_module('setuptools._distutils')
|
||||
|
||||
def exec_module(self, module):
|
||||
pass
|
||||
|
||||
return importlib.util.spec_from_loader('distutils', DistutilsLoader())
|
||||
|
||||
def spec_for_pip(self):
|
||||
"""
|
||||
Ensure stdlib distutils when running under pip.
|
||||
See pypa/pip#8761 for rationale.
|
||||
"""
|
||||
if self.pip_imported_during_build():
|
||||
return
|
||||
clear_distutils()
|
||||
self.spec_for_distutils = lambda: None
|
||||
|
||||
@staticmethod
|
||||
def pip_imported_during_build():
|
||||
"""
|
||||
Detect if pip is being imported in a build script. Ref #2355.
|
||||
"""
|
||||
import traceback
|
||||
return any(
|
||||
frame.f_globals['__file__'].endswith('setup.py')
|
||||
for frame, line in traceback.walk_stack(None)
|
||||
)
|
||||
|
||||
|
||||
DISTUTILS_FINDER = DistutilsMetaFinder()
|
||||
|
||||
|
||||
def add_shim():
|
||||
sys.meta_path.insert(0, DISTUTILS_FINDER)
|
||||
|
||||
|
||||
def remove_shim():
|
||||
try:
|
||||
sys.meta_path.remove(DISTUTILS_FINDER)
|
||||
except ValueError:
|
||||
pass
|
||||
@@ -1 +0,0 @@
|
||||
__import__('_distutils_hack').do_override()
|
||||
@@ -1 +0,0 @@
|
||||
import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'stdlib') == 'local'; enabled and __import__('_distutils_hack').add_shim();
|
||||
@@ -1 +0,0 @@
|
||||
pip
|
||||
@@ -1,103 +0,0 @@
|
||||
Metadata-Version: 2.4
|
||||
Name: lxml
|
||||
Version: 6.0.2
|
||||
Summary: Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.
|
||||
Home-page: https://lxml.de/
|
||||
Author: lxml dev team
|
||||
Author-email: lxml@lxml.de
|
||||
Maintainer: lxml dev team
|
||||
Maintainer-email: lxml@lxml.de
|
||||
License: BSD-3-Clause
|
||||
Project-URL: Source, https://github.com/lxml/lxml
|
||||
Project-URL: Bug Tracker, https://bugs.launchpad.net/lxml
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Intended Audience :: Information Technology
|
||||
Classifier: Programming Language :: Cython
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Programming Language :: Python :: 3.12
|
||||
Classifier: Programming Language :: Python :: 3.13
|
||||
Classifier: Programming Language :: Python :: 3.14
|
||||
Classifier: Programming Language :: C
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Topic :: Text Processing :: Markup :: HTML
|
||||
Classifier: Topic :: Text Processing :: Markup :: XML
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Requires-Python: >=3.8
|
||||
License-File: LICENSE.txt
|
||||
License-File: LICENSES.txt
|
||||
Provides-Extra: source
|
||||
Provides-Extra: cssselect
|
||||
Requires-Dist: cssselect>=0.7; extra == "cssselect"
|
||||
Provides-Extra: html5
|
||||
Requires-Dist: html5lib; extra == "html5"
|
||||
Provides-Extra: htmlsoup
|
||||
Requires-Dist: BeautifulSoup4; extra == "htmlsoup"
|
||||
Provides-Extra: html-clean
|
||||
Requires-Dist: lxml_html_clean; extra == "html-clean"
|
||||
Dynamic: author
|
||||
Dynamic: author-email
|
||||
Dynamic: classifier
|
||||
Dynamic: description
|
||||
Dynamic: home-page
|
||||
Dynamic: license
|
||||
Dynamic: license-file
|
||||
Dynamic: maintainer
|
||||
Dynamic: maintainer-email
|
||||
Dynamic: project-url
|
||||
Dynamic: provides-extra
|
||||
Dynamic: requires-python
|
||||
Dynamic: summary
|
||||
|
||||
lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries.
|
||||
It provides safe and convenient access to these libraries using the
|
||||
ElementTree API.
|
||||
|
||||
It extends the ElementTree API significantly to offer support for XPath,
|
||||
RelaxNG, XML Schema, XSLT, C14N and much more.
|
||||
|
||||
To contact the project, go to the `project home page <https://lxml.de/>`_
|
||||
or see our bug tracker at https://launchpad.net/lxml
|
||||
|
||||
In case you want to use the current in-development version of lxml,
|
||||
you can get it from the github repository at
|
||||
https://github.com/lxml/lxml . Note that this requires Cython to
|
||||
build the sources, see the build instructions on the project home page.
|
||||
|
||||
|
||||
After an official release of a new stable series, bug fixes may become available at
|
||||
https://github.com/lxml/lxml/tree/lxml-6.0 .
|
||||
Running ``pip install https://github.com/lxml/lxml/archive/refs/heads/lxml-6.0.tar.gz``
|
||||
will install the unreleased branch state as soon as a maintenance branch has been established.
|
||||
Note that this requires Cython to be installed at an appropriate version for the build.
|
||||
|
||||
6.0.2 (2025-09-21)
|
||||
==================
|
||||
|
||||
Bugs fixed
|
||||
----------
|
||||
|
||||
* LP#2125278: Compilation with libxml2 2.15.0 failed.
|
||||
Original patch by Xi Ruoyao.
|
||||
|
||||
* Setting ``decompress=True`` in the parser had no effect in libxml2 2.15.
|
||||
|
||||
* Binary wheels on Linux and macOS use the library version libxml2 2.14.6.
|
||||
See https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.14.6
|
||||
|
||||
* Test failures in libxml2 2.15.0 were fixed.
|
||||
|
||||
Other changes
|
||||
-------------
|
||||
|
||||
* Binary wheels for Py3.9-3.11 on the ``riscv64`` architecture were added.
|
||||
|
||||
* Error constants were updated to match libxml2 2.15.0.
|
||||
|
||||
* Built using Cython 3.1.4.
|
||||
|
||||
|
||||
@@ -1,205 +0,0 @@
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/ElementInclude.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/__init__.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/_elementpath.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/builder.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/cssselect.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/doctestcompare.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/ElementSoup.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/__init__.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/_diffcommand.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/_difflib.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/_html5builder.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/_setmixin.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/builder.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/clean.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/defs.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/diff.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/formfill.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/html5parser.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/soupparser.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/html/usedoctest.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/includes/__init__.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/includes/extlibs/__init__.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/includes/libexslt/__init__.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/includes/libxml/__init__.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/includes/libxslt/__init__.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/isoschematron/__init__.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/pyclasslookup.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/sax.cpython-39.pyc,,
|
||||
../../../../../../../Library/Caches/com.apple.python/Users/aln/Work/26.molenda.net/eeeeee/.venv/lib/python3.9/site-packages/lxml/usedoctest.cpython-39.pyc,,
|
||||
lxml-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
lxml-6.0.2.dist-info/METADATA,sha256=0qIHkwlNTTMz4-c5e8ZnbbGgt_vpYZHCEoqXyckR95Q,3622
|
||||
lxml-6.0.2.dist-info/RECORD,,
|
||||
lxml-6.0.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
lxml-6.0.2.dist-info/WHEEL,sha256=Sli93J_eWejDC8SYRdplFsirRhz71iXfsqaGlL-WXt4,139
|
||||
lxml-6.0.2.dist-info/licenses/LICENSE.txt,sha256=j8K1aBM1FuRoRdIUeRet7uFkjnCumrXtbFQXr-9M6FU,1507
|
||||
lxml-6.0.2.dist-info/licenses/LICENSES.txt,sha256=QdSd1AaqDhVIptXyGjDWv2OLPNlutyid00jYPtLkA5I,1514
|
||||
lxml-6.0.2.dist-info/top_level.txt,sha256=NjD988wqaKq512nshNdLt-uDxsjkp4Bh51m6N-dhUrk,5
|
||||
lxml/ElementInclude.py,sha256=PSLeZFvCa76WHJulPLxcZXJtCI2-4dK2CtqPRiYOAQg,8560
|
||||
lxml/__init__.py,sha256=rgOcPyZUNBFL30ylxIxd8fHHWi6TwyIUCi8Av84XWwo,574
|
||||
lxml/_elementpath.cpython-39-darwin.so,sha256=bLMQjJKxDD_TYBnSjkj8Ulaz_x-OGZyBBANoroBP9Rs,497776
|
||||
lxml/_elementpath.py,sha256=b80hM3ndAkTtRX6v54za3LkkAqCcd0700BbMPZHnTBU,10959
|
||||
lxml/apihelpers.pxi,sha256=9S6bzp-VKCUPZv0f6-el5PsbPFN4FJqSnMCIYilS0eU,63881
|
||||
lxml/builder.cpython-39-darwin.so,sha256=FMGelyMN0skDjhPYG_2wHGrFn60_atPRt0pF9XNjYhg,291248
|
||||
lxml/builder.py,sha256=KI1HxHTd4wJqqVfmTRtSbXBQdl2T-P36ih4hT-J3MNw,8485
|
||||
lxml/classlookup.pxi,sha256=Tax8Vhbm5C6UCjgmRFsYjW0pFHxIuTthH1MOgASDLgc,22435
|
||||
lxml/cleanup.pxi,sha256=ZNEpbv7qx_ICPzsxhCaMUHCOfiznOoZ_u3jlYXHAuh4,8454
|
||||
lxml/cssselect.py,sha256=_wZdX-B9p5MeIYABmENIYRWEkwXwX-7jO8Dkf-1rUZU,3306
|
||||
lxml/debug.pxi,sha256=KTcpR8-slUYvmIPbE35GoHDNTb-gjTEvD7bw6LltM4c,1125
|
||||
lxml/docloader.pxi,sha256=bYSZAxxbBEfVzfLXTUWFRfOyUTfV23L7i9hR2dgtSNY,5772
|
||||
lxml/doctestcompare.py,sha256=40EDnkwpcvW86qNa86990OXF42xdHaosSZoiBsEjkzU,17731
|
||||
lxml/dtd.pxi,sha256=IAKkmA4ZoC68sqAWcTqoS8jEGYcPQrVMCZgn4iLBYko,15281
|
||||
lxml/etree.cpython-39-darwin.so,sha256=1dxECBWNkHaqx3kW2B_rdGJJOGqhAwifHiwpIM9fJPc,9667432
|
||||
lxml/etree.h,sha256=_NkGkD3C_jpE4UegvQ6Y32_ycTbUCLyOBz9xfWRPkug,9792
|
||||
lxml/etree.pyx,sha256=2qCb8ZNjsdoB0fUELYwAM4ldLQZWS5_gt-OxKEUM-vs,138014
|
||||
lxml/etree_api.h,sha256=dNCm28ubaVS8SbhLuxs9JvYWg41NoR_yD3qTRr7hliA,17372
|
||||
lxml/extensions.pxi,sha256=xKLad35EQgpsDhs07tw31aKJBBMWIK9rMc0JTXETAUA,32022
|
||||
lxml/html/ElementSoup.py,sha256=s_dLobLMuKn2DhexR-iDXdZrMFg1RjLy1feHsIeZMpw,320
|
||||
lxml/html/__init__.py,sha256=CC5WdsvSptZhr9MZya1qsL6JKVbviYdrHOhXrGhmORg,64425
|
||||
lxml/html/_diffcommand.py,sha256=kz_7EP9PmYWuczlZcGiw74_rG0eTKvQ2lrO0rkiwlYE,2081
|
||||
lxml/html/_difflib.cpython-39-darwin.so,sha256=dMVHZht3-JzseMx38JS4F4PleEoGgocS-DeNmK0wlr4,1121904
|
||||
lxml/html/_difflib.py,sha256=GgH_jVrZQC8tI8WV_lFZQsXFJ3mOTAPup1zjBJNvkPo,84954
|
||||
lxml/html/_html5builder.py,sha256=NLaT-Ev-aBgJpeQl-6ZbJChLZK5GV-znDkHOJD5VQC4,3230
|
||||
lxml/html/_setmixin.py,sha256=8IFIOLmVz0G-XzsD2tCEkSFWO-dgPBHgvHufC8ni67s,1188
|
||||
lxml/html/builder.py,sha256=Uz3r5uiuCdoN0UPa7ngoLMwAadVIhslzGvlRPGigY_M,6187
|
||||
lxml/html/clean.py,sha256=FghSJy4jt2RaBy6dgusowkU18hxpZ4XLE5ceCK9qxyA,503
|
||||
lxml/html/defs.py,sha256=l_6nh4DHvrsVyWVqWCUUx14QiahRyZv4Melqy_thf6Q,4250
|
||||
lxml/html/diff.cpython-39-darwin.so,sha256=JRjIXbSLwiLMZgYR6HCADsQvm885MQ6IpNCYT0PlAyI,732312
|
||||
lxml/html/diff.py,sha256=Za0By-yeYlQEjUu7m7xKB288kKiy8VBS5gT0RPOaFY0,32989
|
||||
lxml/html/formfill.py,sha256=umgk0BbkAI1W6q9musFbL-cDnI_aap2NsLBJqk0UmVI,9681
|
||||
lxml/html/html5parser.py,sha256=dnyC4cqHxywjZSzk0mu2L7THTZjxhg4yF4pncjusa_w,8634
|
||||
lxml/html/soupparser.py,sha256=xo8VvNeOEb-SChuXLKCRECh8J7HBiJLE9sAbEskoUUQ,10197
|
||||
lxml/html/usedoctest.py,sha256=tPlmVz4KK1GRKV5DJLrdVECeqsT9PlDzSqqTodVi5s0,249
|
||||
lxml/includes/__init__.pxd,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
lxml/includes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
lxml/includes/c14n.pxd,sha256=DBQcOJ0c_YS245ohMb8fmuEC1kFyv1LrNY_8Mf-syZg,1110
|
||||
lxml/includes/config.pxd,sha256=H6Mrl8It21hzRI2hzMId9W48QqkYYkoLT4dniLNmdTw,96
|
||||
lxml/includes/dtdvalid.pxd,sha256=Nv0OykjYehv2lO-Zj--q6jS3TAC_dvQVPSgPMuse1NM,689
|
||||
lxml/includes/etree_defs.h,sha256=h_UjJTmNUqPyKNNrWB9hxmt6v4CF7_83XVY8dOfxqW0,14524
|
||||
lxml/includes/etreepublic.pxd,sha256=Bn4d3JkWPqXputXqI-eJ0xmPrwNFPTfDCa7axgjB7FM,10184
|
||||
lxml/includes/extlibs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
lxml/includes/extlibs/libcharset.h,sha256=GA0FumrbNI4VDGlzq3lf5CLaCwXgn4unw2l0btGQFwI,1510
|
||||
lxml/includes/extlibs/localcharset.h,sha256=Z_AagaQeq0aDE7NPsVOqEf4nO4KcUp46ggo4d0ONIOQ,6338
|
||||
lxml/includes/extlibs/zconf.h,sha256=ROVD_0UUx6mgHWSAGcLJqB0RBcv6PHfx-vbNhur6ir0,16464
|
||||
lxml/includes/extlibs/zlib.h,sha256=ilV5r3LqT0J_8ApBUPDMs_xcHkN59ybhARM7Grn8YAw,96829
|
||||
lxml/includes/htmlparser.pxd,sha256=9uASkP5dU7OE2lCOLT-z2e01qSbFlp4ehgwdostF_qk,2802
|
||||
lxml/includes/libexslt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
lxml/includes/libexslt/exslt.h,sha256=eSW5tMJAewSUANLqk7AGEiU8b2BbCNRyauHnez7nKSU,3114
|
||||
lxml/includes/libexslt/exsltconfig.h,sha256=QHxzEbRlv_h0USBvpr0Zrl0Muzlc71VCrvgR6lqnLEY,1172
|
||||
lxml/includes/libexslt/exsltexports.h,sha256=1Jm9KTXm2FUUJIZ6V6-Uw55yG0BMULX3_goyxDd2LL8,1077
|
||||
lxml/includes/libxml/HTMLparser.h,sha256=sU4xGqj-vBtEvzlxA3hBPWJboifvkc4F1hynKXmsl3k,9569
|
||||
lxml/includes/libxml/HTMLtree.h,sha256=Q7UBKFbQ8fx4d_dMnmR6ay8JmfOhopFkDp2B63YkLDU,3517
|
||||
lxml/includes/libxml/SAX.h,sha256=SFnG27EFrYGUB9HDL_xSIGBwEns5pl07rApXWThFZFM,386
|
||||
lxml/includes/libxml/SAX2.h,sha256=RfFP5o3le-Rg8bnA2GW7L7L9_pfXCs3TieODcv1DTWY,4240
|
||||
lxml/includes/libxml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
lxml/includes/libxml/c14n.h,sha256=BSBXw6nIZutC8mWvbRrLLmoWjw3wRt-nM93vjXGMCm8,2742
|
||||
lxml/includes/libxml/catalog.h,sha256=H9ssTCaBjtDqc-AZqCk1R7h8F2iD9szqLjJyHpaczXg,4633
|
||||
lxml/includes/libxml/chvalid.h,sha256=TZcceNp6Cw0QlYwIqK9GxyYqL5UiAjpQyjt_yrZGTQE,5087
|
||||
lxml/includes/libxml/debugXML.h,sha256=XXRNI39gJW7bGRC4SzE4ad-SJ906BsUGz3AwOtkKuS4,1667
|
||||
lxml/includes/libxml/dict.h,sha256=SweaPGMtTTf4je6dNTIoEzcfEvpsAT9_PhR7FC0K-rQ,1770
|
||||
lxml/includes/libxml/encoding.h,sha256=haL7ratww2wkIERGmtwUqU2BbTVe52FZFU7MmrOpsPk,9623
|
||||
lxml/includes/libxml/entities.h,sha256=LEOCA826-0f8dhRJzC_2hvUVsSH7lKQjrea9hSTdBbo,4419
|
||||
lxml/includes/libxml/globals.h,sha256=NH8zyRI5cXJJGp5k2aLxOm-reJEGOFX6LYP82GBXRlY,583
|
||||
lxml/includes/libxml/hash.h,sha256=KIIpAYKBfGUU3ydWhGehUyfuauZz_Ps0gyambzQo_rc,7017
|
||||
lxml/includes/libxml/list.h,sha256=oh7iJNQajRA_cHsNk9CcFPYkaW2smf4J_MpedPPjC4k,3128
|
||||
lxml/includes/libxml/nanoftp.h,sha256=22PBtWhJueYLFvwukt4oFooRct_xJA83hbluHRBNXUM,302
|
||||
lxml/includes/libxml/nanohttp.h,sha256=bLbzYjAyAKmP3ComMOPH6XaUImu6bNAESF1HrVtRve0,2124
|
||||
lxml/includes/libxml/parser.h,sha256=Uq7-ce55UUAsvo4n6CiBlNQpmowewvWhOsQtgGM1UQ8,48498
|
||||
lxml/includes/libxml/parserInternals.h,sha256=8_Wr6UgRzm8BRn1RPLxyBkw6BagAdDvVqMA_e181_EI,14539
|
||||
lxml/includes/libxml/relaxng.h,sha256=VXZ74r5Yja06KqypdBHc8neDwPxQ2aMrsWHSdRt5oi4,5991
|
||||
lxml/includes/libxml/schemasInternals.h,sha256=V8M4In3zf24EX55Yt4dcfxwp7NpHGYViKnLKwtyrPJ4,26233
|
||||
lxml/includes/libxml/schematron.h,sha256=8EhPDhvtlMxl9e0C5rSbEruOvzJS5BC_OOFbq9RXZnY,4255
|
||||
lxml/includes/libxml/threads.h,sha256=mT3CgK4lXK7-NDnUOFXqYuCK6fyY70S3BsHF-TnT45k,1619
|
||||
lxml/includes/libxml/tree.h,sha256=zTRLt6h5x6ApyeXgs90CKQZSAl2hKm7b5NxtPKUQFAE,36106
|
||||
lxml/includes/libxml/uri.h,sha256=J9teJHme5z883c4twF5oImEYY-E3xSvhdSGpyRVtvIg,2855
|
||||
lxml/includes/libxml/valid.h,sha256=By61IbPvk_eLux7a8x0mOaly7oclFaSGaFE8b2xZcUE,13226
|
||||
lxml/includes/libxml/xinclude.h,sha256=K3I5jhw2zAMj26LuRNZc15Bwv2JE2hWxwVn4TCqv2b4,3258
|
||||
lxml/includes/libxml/xlink.h,sha256=TVLOkISrcKDelo9n_XIUyPiStDYa8NxuF2dz70aBFCI,5062
|
||||
lxml/includes/libxml/xmlIO.h,sha256=FvbuMYTy1-S5PScabE03wz0oWKf626pmXvOPZNuLm-w,11948
|
||||
lxml/includes/libxml/xmlautomata.h,sha256=7Sc3YgPz1ZIBKCHPSxs5oAwJEZWQ1RT2kyUw85pUtmU,4004
|
||||
lxml/includes/libxml/xmlerror.h,sha256=mMfltMxUza6kiSBfP2QfnY3UlMP_rEXKfX0wruBLl4A,37561
|
||||
lxml/includes/libxml/xmlexports.h,sha256=IyV3AMeQVbOl0wkjlnNX4B8WUZ-5GNKQmxZc6-maWUU,2025
|
||||
lxml/includes/libxml/xmlmemory.h,sha256=m7wGvVMxNzZiuOAo3vkjxaVWstc8aQLzb6obbjPsebE,4658
|
||||
lxml/includes/libxml/xmlmodule.h,sha256=ERUHUmDdZRmh6NjLYWUpse51rLWR8qNjPHOtdgmlLF0,1198
|
||||
lxml/includes/libxml/xmlreader.h,sha256=BAHinlSOTXX3DEax9BniaIIPAXJyLGfzym9R-27LCcU,12387
|
||||
lxml/includes/libxml/xmlregexp.h,sha256=_q6C1XRy8DS3kSmLbEKpvkKQciTgjTJgGc_zUQ6m22M,2632
|
||||
lxml/includes/libxml/xmlsave.h,sha256=zcEQr9sO5CsFrnoOLshhdsqMEr8k4AeFhbkYyNfO9Fs,2934
|
||||
lxml/includes/libxml/xmlschemas.h,sha256=5AfLnYUcfmxHRzg0dVpdHig--4ui1-XDwDgpKGDKCiU,7067
|
||||
lxml/includes/libxml/xmlschemastypes.h,sha256=MYwlGmoKAo3lHRaaKgnCXiLmPT9KRjdxyCJ7TEyZ6jM,4583
|
||||
lxml/includes/libxml/xmlstring.h,sha256=d5PpqxP1I1sfmCUHvVJtjoC9h7hLHcAAQ5ok_Rtf50I,5271
|
||||
lxml/includes/libxml/xmlunicode.h,sha256=8sq3wEW2AiyTCuc3ZceOEkce7lfrI7VnkRfwEQgc6pU,278
|
||||
lxml/includes/libxml/xmlversion.h,sha256=oVpaE_xbttaeZNFKSuSfcLOceWz7LQgKP71Z1msXZNo,5112
|
||||
lxml/includes/libxml/xmlwriter.h,sha256=BEUwYNKx3xymDE9vepksEK7yVq9SXYm1d2pQnzlPy90,20688
|
||||
lxml/includes/libxml/xpath.h,sha256=CQv6X_pRhuXoCVpqoDXYB7FfusLK7AuPxCNigwhNYAA,16156
|
||||
lxml/includes/libxml/xpathInternals.h,sha256=mc9B5tdpfssyz_NPUzww6dKuWCtBybBiBRJkTe4AE4U,18504
|
||||
lxml/includes/libxml/xpointer.h,sha256=DAxMsfPp2SSZgXFrPbxBA84RwTMRf35Qg_LBbUzPQhA,1026
|
||||
lxml/includes/libxslt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
lxml/includes/libxslt/attributes.h,sha256=qKwzfGf7r89esLC65s96iYJWRA-s-Ezss2_V6Mmo1hk,957
|
||||
lxml/includes/libxslt/documents.h,sha256=kBihgH5pqRvFalhm_fOFHtJTFhTpBcm681yT5dxgwfw,2704
|
||||
lxml/includes/libxslt/extensions.h,sha256=W5UMyJqUP_1zt6sXZ0mgc0gAIwDJrZ8gjByhyrWqvd8,6899
|
||||
lxml/includes/libxslt/extra.h,sha256=6X3Wu3NdPtrlqz-Koo7dB-rccnnszi6j3zg599gTByg,1640
|
||||
lxml/includes/libxslt/functions.h,sha256=fc4CZj-9KeBHzO9-WWU_bNqmaEZAz3n7NNwClIBXk14,1972
|
||||
lxml/includes/libxslt/imports.h,sha256=18kIjoGqdFXR63Ce3ZtzxsTiYV3XGKpchYakMUPDuUI,1840
|
||||
lxml/includes/libxslt/keys.h,sha256=16v25VEluS7jYhgg6gYFwVxgGMn-1ctnlhhWWT4RcBY,1155
|
||||
lxml/includes/libxslt/namespaces.h,sha256=VofSn2Kkn-a5JyRKCmY3jPp7amQy3n09vzy0KUQt4q0,1666
|
||||
lxml/includes/libxslt/numbersInternals.h,sha256=Eg5gYZ5p3h0_e5wyI61S-0E6_ArVJzv0yr63j6BU2fc,2019
|
||||
lxml/includes/libxslt/pattern.h,sha256=tJ-BPfs9UYgiZMMoQZbhij3g7xVppYq7TrrOu25eR7Q,2110
|
||||
lxml/includes/libxslt/preproc.h,sha256=D_LjEdHhsdyBnEAvflnwFgoR4hGUb72kgEhXkkmPRsw,896
|
||||
lxml/includes/libxslt/security.h,sha256=fUD1cy_WxFCTvTNAF0WOQIU4p5CNWn1LHFyZJd-Fx5U,2652
|
||||
lxml/includes/libxslt/templates.h,sha256=bnt6Jqui6KU5pNUdMNPbQZkZ5d-VTWqC0TMGkOlVoIo,2268
|
||||
lxml/includes/libxslt/transform.h,sha256=ICT7meUV0OTAx27WaKVrKj-aUmR9LSpTNaOAJd2UStg,6311
|
||||
lxml/includes/libxslt/variables.h,sha256=cQAgPe4QCcK2uKbWg7Iz-9peM9xWGm7m3M6jQm0sjIA,3143
|
||||
lxml/includes/libxslt/xslt.h,sha256=wmFx2Q31Pd8Iq2phAQpY9J3QQatb8lWg3gABtqKFgEw,1964
|
||||
lxml/includes/libxslt/xsltInternals.h,sha256=2EbEKYmnYZq0HjGnUMAlpqnqZJurRXzjlgk5Js1WYaY,57949
|
||||
lxml/includes/libxslt/xsltconfig.h,sha256=cV5scdRK6xmOHeOg3OCw6hBfcQ_nrtNs_tKefX67304,2910
|
||||
lxml/includes/libxslt/xsltexports.h,sha256=1-luH-0bCIgBAlKAXhV-dqHBfwOAQNDamiYbxIlTf0k,1124
|
||||
lxml/includes/libxslt/xsltlocale.h,sha256=ppxGEmJfZIJgwRQzCM0_77p9WNekEWq1NrdYZrQl4IE,942
|
||||
lxml/includes/libxslt/xsltutils.h,sha256=1eguYgR9-jeNOVlBUktHboaq-VLX6JXraO80TfbARKM,9085
|
||||
lxml/includes/lxml-version.h,sha256=KZfk_lJnXSnxkyRdUV5taHsWJe4xbC6UEYfYldlfouI,71
|
||||
lxml/includes/relaxng.pxd,sha256=HzHlQ6mCcf_tj_JZ9NAVJTVAv8ScCkE8Ifq15y3bS0c,2615
|
||||
lxml/includes/schematron.pxd,sha256=Hob7xh-K-MKqp7WiG8thMagf5EkQzmgfi4ds0EF91JA,1604
|
||||
lxml/includes/tree.pxd,sha256=XApzMRy_LSqCtQ-OTS-vNSW7CT_OWstybfIT2H84LsA,20179
|
||||
lxml/includes/uri.pxd,sha256=3vOXw6AbSPxAM9uo71T1qnfx-wd9ezXLDQtWsb2zX0I,145
|
||||
lxml/includes/xinclude.pxd,sha256=CuO_XZNB6E2JK1qXXWn11APrjFQV5kA6SMyb77WZn0A,804
|
||||
lxml/includes/xmlerror.pxd,sha256=OQqayytkV0NigAPbsQCCcvmy7luRe0XhVzpTdzJjP3g,58837
|
||||
lxml/includes/xmlparser.pxd,sha256=eDGyU5kZyNVksK0dUhMIi7rnE-LSevXsqyl72v99Ess,13730
|
||||
lxml/includes/xmlschema.pxd,sha256=OLZPd2WDJyopiXJJyo-dAyyYHaeSYFiMAI4tqIiv-Ik,1702
|
||||
lxml/includes/xpath.pxd,sha256=e8-ZYUbRG7N1mHETAlknJ_QqAteOosrYLRgpH-OsTkg,5603
|
||||
lxml/includes/xslt.pxd,sha256=4yl3pOu7pAvsx5Tc-W4IWCoB8wgtSSR62HI1jqu6jko,8241
|
||||
lxml/isoschematron/__init__.py,sha256=uauerYeKTlWFCJSqieIHhF5l6rYV2myeEJ0Imd1LzRc,13274
|
||||
lxml/isoschematron/resources/rng/iso-schematron.rng,sha256=VsWxPyi3iViJDDbjJJw0wWkEHkLrz9zoCA8zJLor9N4,18337
|
||||
lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl,sha256=ObebsB8Wt-d3uIA_U5NU85TpnQ3PxPX38TdOAqosMac,3172
|
||||
lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl,sha256=QweRrIIM-zFcgg98GXA2CaWfIbgVE0XKEeYSfvv67A0,4563
|
||||
lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl,sha256=xSZ_Ekq_I-62ZpiE5AqYYHwFW_qh855zt9V4_s7rbkY,11703
|
||||
lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl,sha256=x42QJ-dxQ1waPzydsCoQnp2Xj15y53nW43O7BuoDRHk,39957
|
||||
lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl,sha256=Tr9BnO6pzjVWwhqJfm10UlvAy95EgfSCz2iMlrVGT6Q,2015
|
||||
lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl,sha256=ue8q_88X4e_jsJizo31GRNBxNhdxkEE9fY20oq0Iqwk,71764
|
||||
lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl,sha256=BBAdsVSi5zAzeGepuN6gS1saQINDqITXKplmmj4dTWg,20382
|
||||
lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt,sha256=OGLiFswuLJEW5EPYKOeoauuCJFEtVa6jyzBE1OcJI98,3310
|
||||
lxml/iterparse.pxi,sha256=JXvYhSOCaRjT_hYbRGMlJt2rlqx0TiRpN4FE1jQc63w,16521
|
||||
lxml/lxml.etree.h,sha256=_NkGkD3C_jpE4UegvQ6Y32_ycTbUCLyOBz9xfWRPkug,9792
|
||||
lxml/lxml.etree_api.h,sha256=dAbJPd53D_9CIGzePAUB3otgyhG4o2cSdA4-6apdzRA,17377
|
||||
lxml/nsclasses.pxi,sha256=5pzNBhBtlqObPdThL9QIGRs1Dxj1qnr0PyXuTCURqTg,9129
|
||||
lxml/objectify.cpython-39-darwin.so,sha256=ViaJLjaF61LbuAfG1W_JQjiw94eI0t4W6Fo39Vr7qiA,5235008
|
||||
lxml/objectify.pyx,sha256=I4bQQXmQssBtk5bTrid-eVURBLKRTM5iQZiviugIrts,75823
|
||||
lxml/objectpath.pxi,sha256=s5TNG2-EbaWWKLFAiX303B95zK_Ui8ausB__3QvFFGw,11450
|
||||
lxml/parser.pxi,sha256=VZfychEJ3-XPE3x6oGOEzn6HVAr74R7lXfDSVF-hq-U,85411
|
||||
lxml/parsertarget.pxi,sha256=v1PidxRaG5giwXcTDkpBI7PDFmsZuOcK0y9LdkQaY8M,6326
|
||||
lxml/proxy.pxi,sha256=8IVvYF2KTuzl7Hb3XGHEmcxfSLbUZkA2Q1Y50hLsyzE,23929
|
||||
lxml/public-api.pxi,sha256=XoP6_cJOEoQIItvE1RiYCKYD1ry4AobaOr4XLo0KSE4,6666
|
||||
lxml/pyclasslookup.py,sha256=gLD1HM2HtITYYiGzjEOewSwbB7XkVx_NZv_quCt79Oc,92
|
||||
lxml/readonlytree.pxi,sha256=ddRYczhHieJ4XUvWvTPW9N9oQ8vuKtv7lC1mtE1qvH8,18976
|
||||
lxml/relaxng.pxi,sha256=3OQ-fZMzP-KF5vM6HTozT_9ee3J0DJnpj9RcHC8LoMw,6339
|
||||
lxml/sax.cpython-39-darwin.so,sha256=S7bAW0Qog3b2gbfTY143QChGQgF_F64jRRQbBZafJIM,416008
|
||||
lxml/sax.py,sha256=yrNvKD6rlon48jrR-1qpFXER8j4psYC2R5yt0u6TWLs,9706
|
||||
lxml/saxparser.pxi,sha256=TmkdM5h9xII9iKRaBk_1NGk2KTfeesl5Ha8bpFQGqLc,33529
|
||||
lxml/schematron.pxi,sha256=F2OHKZUl57-byBk_wWtPTnHZ1fwlj0FtwG3VuGtG-UY,6064
|
||||
lxml/serializer.pxi,sha256=iIXfechFHfvFs2sTk7wMIy3sDJxmaMPbNO33mkLLBUE,68063
|
||||
lxml/usedoctest.py,sha256=qRgZKQVcAZcl-zN0AIXVJnOsETUXz2nPXkxuzs1lGgk,230
|
||||
lxml/xinclude.pxi,sha256=7eBrI_OK47mmrHQ0ixbixRI8pKqQ1nwkMV-OmKUVlD4,2456
|
||||
lxml/xmlerror.pxi,sha256=i1kR42WB2BAxtrmh7m2ADlH-jffVQ-blW3pW0Ps4s-g,50061
|
||||
lxml/xmlid.pxi,sha256=5zf9oR6bsCtavGiOmilNyHqYwgG_bnrIabSd2SURtm0,6073
|
||||
lxml/xmlschema.pxi,sha256=mumNoHni5S3BQPtcmOHRd61KRaVWu4eOie2wQeB0e6E,8490
|
||||
lxml/xpath.pxi,sha256=aqW24V817dUxps4Gnc8h7Tm3QVlITKvxU5_9WgJUIFg,19132
|
||||
lxml/xslt.pxi,sha256=wxdbuvNFVA8eP57tHmBYWER__ceFhf6HGdsbBHbx_0A,36315
|
||||
lxml/xsltext.pxi,sha256=TImDiAPlAezC07P7RY1N9YChA7AuKFH-G53hXdel9yc,11088
|
||||
@@ -1,6 +0,0 @@
|
||||
Wheel-Version: 1.0
|
||||
Generator: setuptools (80.9.0)
|
||||
Root-Is-Purelib: false
|
||||
Tag: cp39-cp39-macosx_10_9_universal2
|
||||
Generator: delocate 0.13.0
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2004 Infrae. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
3. Neither the name of Infrae nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INFRAE OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -1,29 +0,0 @@
|
||||
lxml is copyright Infrae and distributed under the BSD license (see
|
||||
doc/licenses/BSD.txt), with the following exceptions:
|
||||
|
||||
Some code, such a selftest.py, selftest2.py and
|
||||
src/lxml/_elementpath.py are derived from ElementTree and
|
||||
cElementTree. See doc/licenses/elementtree.txt for the license text.
|
||||
|
||||
lxml.cssselect and lxml.html are copyright Ian Bicking and distributed
|
||||
under the BSD license (see doc/licenses/BSD.txt).
|
||||
|
||||
test.py, the test-runner script, is GPL and copyright Shuttleworth
|
||||
Foundation. See doc/licenses/GPL.txt. It is believed the unchanged
|
||||
inclusion of test.py to run the unit test suite falls under the
|
||||
"aggregation" clause of the GPL and thus does not affect the license
|
||||
of the rest of the package.
|
||||
|
||||
The isoschematron implementation uses several XSL and RelaxNG resources:
|
||||
* The (XML syntax) RelaxNG schema for schematron, copyright International
|
||||
Organization for Standardization (see
|
||||
src/lxml/isoschematron/resources/rng/iso-schematron.rng for the license
|
||||
text)
|
||||
* The skeleton iso-schematron-xlt1 pure-xslt schematron implementation
|
||||
xsl stylesheets, copyright Rick Jelliffe and Academia Sinica Computing
|
||||
Center, Taiwan (see the xsl files here for the license text:
|
||||
src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/)
|
||||
* The xsd/rng schema schematron extraction xsl transformations are unlicensed
|
||||
and copyright the respective authors as noted (see
|
||||
src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl and
|
||||
src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl)
|
||||
@@ -1 +0,0 @@
|
||||
lxml
|
||||
@@ -1,244 +0,0 @@
|
||||
#
|
||||
# ElementTree
|
||||
# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
|
||||
#
|
||||
# limited xinclude support for element trees
|
||||
#
|
||||
# history:
|
||||
# 2003-08-15 fl created
|
||||
# 2003-11-14 fl fixed default loader
|
||||
#
|
||||
# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
Limited XInclude support for the ElementTree package.
|
||||
|
||||
While lxml.etree has full support for XInclude (see
|
||||
`etree.ElementTree.xinclude()`), this module provides a simpler, pure
|
||||
Python, ElementTree compatible implementation that supports a simple
|
||||
form of custom URL resolvers.
|
||||
"""
|
||||
|
||||
from lxml import etree
|
||||
try:
|
||||
from urlparse import urljoin
|
||||
from urllib2 import urlopen
|
||||
except ImportError:
|
||||
# Python 3
|
||||
from urllib.parse import urljoin
|
||||
from urllib.request import urlopen
|
||||
|
||||
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
|
||||
|
||||
XINCLUDE_INCLUDE = XINCLUDE + "include"
|
||||
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
|
||||
XINCLUDE_ITER_TAG = XINCLUDE + "*"
|
||||
|
||||
# For security reasons, the inclusion depth is limited to this read-only value by default.
|
||||
DEFAULT_MAX_INCLUSION_DEPTH = 6
|
||||
|
||||
|
||||
##
|
||||
# Fatal include error.
|
||||
|
||||
class FatalIncludeError(etree.LxmlSyntaxError):
|
||||
pass
|
||||
|
||||
|
||||
class LimitedRecursiveIncludeError(FatalIncludeError):
|
||||
pass
|
||||
|
||||
|
||||
##
|
||||
# ET compatible default loader.
|
||||
# This loader reads an included resource from disk.
|
||||
#
|
||||
# @param href Resource reference.
|
||||
# @param parse Parse mode. Either "xml" or "text".
|
||||
# @param encoding Optional text encoding.
|
||||
# @return The expanded resource. If the parse mode is "xml", this
|
||||
# is an ElementTree instance. If the parse mode is "text", this
|
||||
# is a Unicode string. If the loader fails, it can return None
|
||||
# or raise an IOError exception.
|
||||
# @throws IOError If the loader fails to load the resource.
|
||||
|
||||
def default_loader(href, parse, encoding=None):
|
||||
file = open(href, 'rb')
|
||||
if parse == "xml":
|
||||
data = etree.parse(file).getroot()
|
||||
else:
|
||||
data = file.read()
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
data = data.decode(encoding)
|
||||
file.close()
|
||||
return data
|
||||
|
||||
|
||||
##
|
||||
# Default loader used by lxml.etree - handles custom resolvers properly
|
||||
#
|
||||
|
||||
def _lxml_default_loader(href, parse, encoding=None, parser=None):
|
||||
if parse == "xml":
|
||||
data = etree.parse(href, parser).getroot()
|
||||
else:
|
||||
if "://" in href:
|
||||
f = urlopen(href)
|
||||
else:
|
||||
f = open(href, 'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
data = data.decode(encoding)
|
||||
return data
|
||||
|
||||
|
||||
##
|
||||
# Wrapper for ET compatibility - drops the parser
|
||||
|
||||
def _wrap_et_loader(loader):
|
||||
def load(href, parse, encoding=None, parser=None):
|
||||
return loader(href, parse, encoding)
|
||||
return load
|
||||
|
||||
|
||||
##
|
||||
# Expand XInclude directives.
|
||||
#
|
||||
# @param elem Root element.
|
||||
# @param loader Optional resource loader. If omitted, it defaults
|
||||
# to {@link default_loader}. If given, it should be a callable
|
||||
# that implements the same interface as <b>default_loader</b>.
|
||||
# @param base_url The base URL of the original file, to resolve
|
||||
# relative include file references.
|
||||
# @param max_depth The maximum number of recursive inclusions.
|
||||
# Limited to reduce the risk of malicious content explosion.
|
||||
# Pass None to disable the limitation.
|
||||
# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
|
||||
# @throws FatalIncludeError If the function fails to include a given
|
||||
# resource, or if the tree contains malformed XInclude elements.
|
||||
# @throws IOError If the function fails to load a given resource.
|
||||
# @returns the node or its replacement if it was an XInclude node
|
||||
|
||||
def include(elem, loader=None, base_url=None,
|
||||
max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
|
||||
if max_depth is None:
|
||||
max_depth = -1
|
||||
elif max_depth < 0:
|
||||
raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
|
||||
|
||||
if base_url is None:
|
||||
if hasattr(elem, 'getroot'):
|
||||
tree = elem
|
||||
elem = elem.getroot()
|
||||
else:
|
||||
tree = elem.getroottree()
|
||||
if hasattr(tree, 'docinfo'):
|
||||
base_url = tree.docinfo.URL
|
||||
elif hasattr(elem, 'getroot'):
|
||||
elem = elem.getroot()
|
||||
_include(elem, loader, base_url, max_depth)
|
||||
|
||||
|
||||
def _include(elem, loader=None, base_url=None,
|
||||
max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None):
|
||||
if loader is not None:
|
||||
load_include = _wrap_et_loader(loader)
|
||||
else:
|
||||
load_include = _lxml_default_loader
|
||||
|
||||
if _parent_hrefs is None:
|
||||
_parent_hrefs = set()
|
||||
|
||||
parser = elem.getroottree().parser
|
||||
|
||||
include_elements = list(
|
||||
elem.iter(XINCLUDE_ITER_TAG))
|
||||
|
||||
for e in include_elements:
|
||||
if e.tag == XINCLUDE_INCLUDE:
|
||||
# process xinclude directive
|
||||
href = urljoin(base_url, e.get("href"))
|
||||
parse = e.get("parse", "xml")
|
||||
parent = e.getparent()
|
||||
if parse == "xml":
|
||||
if href in _parent_hrefs:
|
||||
raise FatalIncludeError(
|
||||
"recursive include of %r detected" % href
|
||||
)
|
||||
if max_depth == 0:
|
||||
raise LimitedRecursiveIncludeError(
|
||||
"maximum xinclude depth reached when including file %s" % href)
|
||||
node = load_include(href, parse, parser=parser)
|
||||
if node is None:
|
||||
raise FatalIncludeError(
|
||||
"cannot load %r as %r" % (href, parse)
|
||||
)
|
||||
node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs)
|
||||
if e.tail:
|
||||
node.tail = (node.tail or "") + e.tail
|
||||
if parent is None:
|
||||
return node # replaced the root node!
|
||||
parent.replace(e, node)
|
||||
elif parse == "text":
|
||||
text = load_include(href, parse, encoding=e.get("encoding"))
|
||||
if text is None:
|
||||
raise FatalIncludeError(
|
||||
"cannot load %r as %r" % (href, parse)
|
||||
)
|
||||
predecessor = e.getprevious()
|
||||
if predecessor is not None:
|
||||
predecessor.tail = (predecessor.tail or "") + text
|
||||
elif parent is None:
|
||||
return text # replaced the root node!
|
||||
else:
|
||||
parent.text = (parent.text or "") + text + (e.tail or "")
|
||||
parent.remove(e)
|
||||
else:
|
||||
raise FatalIncludeError(
|
||||
"unknown parse type in xi:include tag (%r)" % parse
|
||||
)
|
||||
elif e.tag == XINCLUDE_FALLBACK:
|
||||
parent = e.getparent()
|
||||
if parent is not None and parent.tag != XINCLUDE_INCLUDE:
|
||||
raise FatalIncludeError(
|
||||
"xi:fallback tag must be child of xi:include (%r)" % e.tag
|
||||
)
|
||||
else:
|
||||
raise FatalIncludeError(
|
||||
"Invalid element found in XInclude namespace (%r)" % e.tag
|
||||
)
|
||||
return elem
|
||||
@@ -1,22 +0,0 @@
|
||||
# this is a package
|
||||
|
||||
__version__ = "6.0.2"
|
||||
|
||||
|
||||
def get_include():
|
||||
"""
|
||||
Returns a list of header include paths (for lxml itself, libxml2
|
||||
and libxslt) needed to compile C code against lxml if it was built
|
||||
with statically linked libraries.
|
||||
"""
|
||||
import os
|
||||
lxml_path = __path__[0]
|
||||
include_path = os.path.join(lxml_path, 'includes')
|
||||
includes = [include_path, lxml_path]
|
||||
|
||||
for name in os.listdir(include_path):
|
||||
path = os.path.join(include_path, name)
|
||||
if os.path.isdir(path):
|
||||
includes.append(path)
|
||||
|
||||
return includes
|
||||
Binary file not shown.
@@ -1,343 +0,0 @@
|
||||
# cython: language_level=3
|
||||
|
||||
#
|
||||
# ElementTree
|
||||
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
|
||||
#
|
||||
# limited xpath support for element trees
|
||||
#
|
||||
# history:
|
||||
# 2003-05-23 fl created
|
||||
# 2003-05-28 fl added support for // etc
|
||||
# 2003-08-27 fl fixed parsing of periods in element names
|
||||
# 2007-09-10 fl new selection engine
|
||||
# 2007-09-12 fl fixed parent selector
|
||||
# 2007-09-13 fl added iterfind; changed findall to return a list
|
||||
# 2007-11-30 fl added namespaces support
|
||||
# 2009-10-30 fl added child element value filter
|
||||
#
|
||||
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2009 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
##
|
||||
# Implementation module for XPath support. There's usually no reason
|
||||
# to import this module directly; the <b>ElementTree</b> does this for
|
||||
# you, if needed.
|
||||
##
|
||||
|
||||
|
||||
import re
|
||||
|
||||
xpath_tokenizer_re = re.compile(
|
||||
"("
|
||||
"'[^']*'|\"[^\"]*\"|"
|
||||
"::|"
|
||||
"//?|"
|
||||
r"\.\.|"
|
||||
r"\(\)|"
|
||||
r"[/.*:\[\]\(\)@=])|"
|
||||
r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
|
||||
r"\s+"
|
||||
)
|
||||
|
||||
def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
|
||||
# ElementTree uses '', lxml used None originally.
|
||||
default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
|
||||
parsing_attribute = False
|
||||
for token in xpath_tokenizer_re.findall(pattern):
|
||||
ttype, tag = token
|
||||
if tag and tag[0] != "{":
|
||||
if ":" in tag and with_prefixes:
|
||||
prefix, uri = tag.split(":", 1)
|
||||
try:
|
||||
if not namespaces:
|
||||
raise KeyError
|
||||
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
|
||||
except KeyError:
|
||||
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
||||
elif tag.isdecimal():
|
||||
yield token # index
|
||||
elif default_namespace and not parsing_attribute:
|
||||
yield ttype, "{%s}%s" % (default_namespace, tag)
|
||||
else:
|
||||
yield token
|
||||
parsing_attribute = False
|
||||
else:
|
||||
yield token
|
||||
parsing_attribute = ttype == '@'
|
||||
|
||||
|
||||
def prepare_child(next, token):
|
||||
tag = token[1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
yield from elem.iterchildren(tag)
|
||||
return select
|
||||
|
||||
def prepare_star(next, token):
|
||||
def select(result):
|
||||
for elem in result:
|
||||
yield from elem.iterchildren('*')
|
||||
return select
|
||||
|
||||
def prepare_self(next, token):
|
||||
def select(result):
|
||||
return result
|
||||
return select
|
||||
|
||||
def prepare_descendant(next, token):
|
||||
token = next()
|
||||
if token[0] == "*":
|
||||
tag = "*"
|
||||
elif not token[0]:
|
||||
tag = token[1]
|
||||
else:
|
||||
raise SyntaxError("invalid descendant")
|
||||
def select(result):
|
||||
for elem in result:
|
||||
yield from elem.iterdescendants(tag)
|
||||
return select
|
||||
|
||||
def prepare_parent(next, token):
|
||||
def select(result):
|
||||
for elem in result:
|
||||
parent = elem.getparent()
|
||||
if parent is not None:
|
||||
yield parent
|
||||
return select
|
||||
|
||||
def prepare_predicate(next, token):
|
||||
# FIXME: replace with real parser!!! refs:
|
||||
# http://effbot.org/zone/simple-iterator-parser.htm
|
||||
# http://javascript.crockford.com/tdop/tdop.html
|
||||
signature = ''
|
||||
predicate = []
|
||||
while 1:
|
||||
token = next()
|
||||
if token[0] == "]":
|
||||
break
|
||||
if token == ('', ''):
|
||||
# ignore whitespace
|
||||
continue
|
||||
if token[0] and token[0][:1] in "'\"":
|
||||
token = "'", token[0][1:-1]
|
||||
signature += token[0] or "-"
|
||||
predicate.append(token[1])
|
||||
|
||||
# use signature to determine predicate type
|
||||
if signature == "@-":
|
||||
# [@attribute] predicate
|
||||
key = predicate[1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
if elem.get(key) is not None:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "@-='":
|
||||
# [@attribute='value']
|
||||
key = predicate[1]
|
||||
value = predicate[-1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
if elem.get(key) == value:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
|
||||
# [tag]
|
||||
tag = predicate[0]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for _ in elem.iterchildren(tag):
|
||||
yield elem
|
||||
break
|
||||
return select
|
||||
if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
|
||||
# [.='value'] or [tag='value']
|
||||
tag = predicate[0]
|
||||
value = predicate[-1]
|
||||
if tag:
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterchildren(tag):
|
||||
if "".join(e.itertext()) == value:
|
||||
yield elem
|
||||
break
|
||||
else:
|
||||
def select(result):
|
||||
for elem in result:
|
||||
if "".join(elem.itertext()) == value:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "-" or signature == "-()" or signature == "-()-":
|
||||
# [index] or [last()] or [last()-index]
|
||||
if signature == "-":
|
||||
# [index]
|
||||
index = int(predicate[0]) - 1
|
||||
if index < 0:
|
||||
if index == -1:
|
||||
raise SyntaxError(
|
||||
"indices in path predicates are 1-based, not 0-based")
|
||||
else:
|
||||
raise SyntaxError("path index >= 1 expected")
|
||||
else:
|
||||
if predicate[0] != "last":
|
||||
raise SyntaxError("unsupported function")
|
||||
if signature == "-()-":
|
||||
try:
|
||||
index = int(predicate[2]) - 1
|
||||
except ValueError:
|
||||
raise SyntaxError("unsupported expression")
|
||||
else:
|
||||
index = -1
|
||||
def select(result):
|
||||
for elem in result:
|
||||
parent = elem.getparent()
|
||||
if parent is None:
|
||||
continue
|
||||
try:
|
||||
# FIXME: what if the selector is "*" ?
|
||||
elems = list(parent.iterchildren(elem.tag))
|
||||
if elems[index] is elem:
|
||||
yield elem
|
||||
except IndexError:
|
||||
pass
|
||||
return select
|
||||
raise SyntaxError("invalid predicate")
|
||||
|
||||
ops = {
|
||||
"": prepare_child,
|
||||
"*": prepare_star,
|
||||
".": prepare_self,
|
||||
"..": prepare_parent,
|
||||
"//": prepare_descendant,
|
||||
"[": prepare_predicate,
|
||||
}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
_cache = {}
|
||||
|
||||
|
||||
def _build_path_iterator(path, namespaces, with_prefixes=True):
|
||||
"""compile selector pattern"""
|
||||
if path[-1:] == "/":
|
||||
path += "*" # implicit all (FIXME: keep this?)
|
||||
|
||||
cache_key = (path,)
|
||||
if namespaces:
|
||||
# lxml originally used None for the default namespace but ElementTree uses the
|
||||
# more convenient (all-strings-dict) empty string, so we support both here,
|
||||
# preferring the more convenient '', as long as they aren't ambiguous.
|
||||
if None in namespaces:
|
||||
if '' in namespaces and namespaces[None] != namespaces['']:
|
||||
raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
|
||||
namespaces[None], namespaces['']))
|
||||
cache_key += (namespaces[None],) + tuple(sorted(
|
||||
item for item in namespaces.items() if item[0] is not None))
|
||||
else:
|
||||
cache_key += tuple(sorted(namespaces.items()))
|
||||
|
||||
try:
|
||||
return _cache[cache_key]
|
||||
except KeyError:
|
||||
pass
|
||||
if len(_cache) > 100:
|
||||
_cache.clear()
|
||||
|
||||
if path[:1] == "/":
|
||||
raise SyntaxError("cannot use absolute path on element")
|
||||
stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
|
||||
try:
|
||||
_next = stream.next
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
_next = stream.__next__
|
||||
try:
|
||||
token = _next()
|
||||
except StopIteration:
|
||||
raise SyntaxError("empty path expression")
|
||||
selector = []
|
||||
while 1:
|
||||
try:
|
||||
selector.append(ops[token[0]](_next, token))
|
||||
except StopIteration:
|
||||
raise SyntaxError("invalid path")
|
||||
try:
|
||||
token = _next()
|
||||
if token[0] == "/":
|
||||
token = _next()
|
||||
except StopIteration:
|
||||
break
|
||||
_cache[cache_key] = selector
|
||||
return selector
|
||||
|
||||
|
||||
##
|
||||
# Iterate over the matching nodes
|
||||
|
||||
def iterfind(elem, path, namespaces=None, with_prefixes=True):
|
||||
selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
|
||||
result = iter((elem,))
|
||||
for select in selector:
|
||||
result = select(result)
|
||||
return result
|
||||
|
||||
|
||||
##
|
||||
# Find first matching object.
|
||||
|
||||
def find(elem, path, namespaces=None, with_prefixes=True):
|
||||
it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
|
||||
try:
|
||||
return next(it)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
##
|
||||
# Find all matching objects.
|
||||
|
||||
def findall(elem, path, namespaces=None, with_prefixes=True):
|
||||
return list(iterfind(elem, path, namespaces))
|
||||
|
||||
|
||||
##
|
||||
# Find text for first matching object.
|
||||
|
||||
def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
|
||||
el = find(elem, path, namespaces, with_prefixes=with_prefixes)
|
||||
if el is None:
|
||||
return default
|
||||
else:
|
||||
return el.text or ''
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -1,243 +0,0 @@
|
||||
# cython: language_level=2
|
||||
|
||||
#
|
||||
# Element generator factory by Fredrik Lundh.
|
||||
#
|
||||
# Source:
|
||||
# http://online.effbot.org/2006_11_01_archive.htm#et-builder
|
||||
# http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
The ``E`` Element factory for generating XML documents.
|
||||
"""
|
||||
|
||||
|
||||
import lxml.etree as ET
|
||||
_QName = ET.QName
|
||||
|
||||
from functools import partial
|
||||
|
||||
try:
|
||||
from types import GenericAlias as _GenericAlias
|
||||
except ImportError:
|
||||
# Python 3.8 - we only need this as return value from "__class_getitem__"
|
||||
def _GenericAlias(cls, item):
|
||||
return f"{cls.__name__}[{item.__name__}]"
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
unicode = str
|
||||
|
||||
|
||||
class ElementMaker:
|
||||
"""Element generator factory.
|
||||
|
||||
Unlike the ordinary Element factory, the E factory allows you to pass in
|
||||
more than just a tag and some optional attributes; you can also pass in
|
||||
text and other elements. The text is added as either text or tail
|
||||
attributes, and elements are inserted at the right spot. Some small
|
||||
examples::
|
||||
|
||||
>>> from lxml import etree as ET
|
||||
>>> from lxml.builder import E
|
||||
|
||||
>>> ET.tostring(E("tag"))
|
||||
'<tag/>'
|
||||
>>> ET.tostring(E("tag", "text"))
|
||||
'<tag>text</tag>'
|
||||
>>> ET.tostring(E("tag", "text", key="value"))
|
||||
'<tag key="value">text</tag>'
|
||||
>>> ET.tostring(E("tag", E("subtag", "text"), "tail"))
|
||||
'<tag><subtag>text</subtag>tail</tag>'
|
||||
|
||||
For simple tags, the factory also allows you to write ``E.tag(...)`` instead
|
||||
of ``E('tag', ...)``::
|
||||
|
||||
>>> ET.tostring(E.tag())
|
||||
'<tag/>'
|
||||
>>> ET.tostring(E.tag("text"))
|
||||
'<tag>text</tag>'
|
||||
>>> ET.tostring(E.tag(E.subtag("text"), "tail"))
|
||||
'<tag><subtag>text</subtag>tail</tag>'
|
||||
|
||||
Here's a somewhat larger example; this shows how to generate HTML
|
||||
documents, using a mix of prepared factory functions for inline elements,
|
||||
nested ``E.tag`` calls, and embedded XHTML fragments::
|
||||
|
||||
# some common inline elements
|
||||
A = E.a
|
||||
I = E.i
|
||||
B = E.b
|
||||
|
||||
def CLASS(v):
|
||||
# helper function, 'class' is a reserved word
|
||||
return {'class': v}
|
||||
|
||||
page = (
|
||||
E.html(
|
||||
E.head(
|
||||
E.title("This is a sample document")
|
||||
),
|
||||
E.body(
|
||||
E.h1("Hello!", CLASS("title")),
|
||||
E.p("This is a paragraph with ", B("bold"), " text in it!"),
|
||||
E.p("This is another paragraph, with a ",
|
||||
A("link", href="http://www.python.org"), "."),
|
||||
E.p("Here are some reserved characters: <spam&egg>."),
|
||||
ET.XML("<p>And finally, here is an embedded XHTML fragment.</p>"),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
print ET.tostring(page)
|
||||
|
||||
Here's a prettyprinted version of the output from the above script::
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>This is a sample document</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1 class="title">Hello!</h1>
|
||||
<p>This is a paragraph with <b>bold</b> text in it!</p>
|
||||
<p>This is another paragraph, with <a href="http://www.python.org">link</a>.</p>
|
||||
<p>Here are some reserved characters: <spam&egg>.</p>
|
||||
<p>And finally, here is an embedded XHTML fragment.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
For namespace support, you can pass a namespace map (``nsmap``)
|
||||
and/or a specific target ``namespace`` to the ElementMaker class::
|
||||
|
||||
>>> E = ElementMaker(namespace="http://my.ns/")
|
||||
>>> print(ET.tostring( E.test ))
|
||||
<test xmlns="http://my.ns/"/>
|
||||
|
||||
>>> E = ElementMaker(namespace="http://my.ns/", nsmap={'p':'http://my.ns/'})
|
||||
>>> print(ET.tostring( E.test ))
|
||||
<p:test xmlns:p="http://my.ns/"/>
|
||||
"""
|
||||
|
||||
def __init__(self, typemap=None,
|
||||
namespace=None, nsmap=None, makeelement=None):
|
||||
self._namespace = '{' + namespace + '}' if namespace is not None else None
|
||||
self._nsmap = dict(nsmap) if nsmap else None
|
||||
|
||||
assert makeelement is None or callable(makeelement)
|
||||
self._makeelement = makeelement if makeelement is not None else ET.Element
|
||||
|
||||
# initialize the default type map functions for this element factory
|
||||
typemap = dict(typemap) if typemap else {}
|
||||
|
||||
def add_text(elem, item):
|
||||
try:
|
||||
last_child = elem[-1]
|
||||
except IndexError:
|
||||
elem.text = (elem.text or "") + item
|
||||
else:
|
||||
last_child.tail = (last_child.tail or "") + item
|
||||
|
||||
def add_cdata(elem, cdata):
|
||||
if elem.text:
|
||||
raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text)
|
||||
elem.text = cdata
|
||||
|
||||
if str not in typemap:
|
||||
typemap[str] = add_text
|
||||
if unicode not in typemap:
|
||||
typemap[unicode] = add_text
|
||||
if ET.CDATA not in typemap:
|
||||
typemap[ET.CDATA] = add_cdata
|
||||
|
||||
def add_dict(elem, item):
|
||||
attrib = elem.attrib
|
||||
for k, v in item.items():
|
||||
if isinstance(v, basestring):
|
||||
attrib[k] = v
|
||||
else:
|
||||
attrib[k] = typemap[type(v)](None, v)
|
||||
|
||||
if dict not in typemap:
|
||||
typemap[dict] = add_dict
|
||||
|
||||
self._typemap = typemap
|
||||
|
||||
def __call__(self, tag, *children, **attrib):
|
||||
typemap = self._typemap
|
||||
|
||||
# We'll usually get a 'str', and the compiled type check is very fast.
|
||||
if not isinstance(tag, str) and isinstance(tag, _QName):
|
||||
# A QName is explicitly qualified, do not look at self._namespace.
|
||||
tag = tag.text
|
||||
elif self._namespace is not None and tag[0] != '{':
|
||||
tag = self._namespace + tag
|
||||
elem = self._makeelement(tag, nsmap=self._nsmap)
|
||||
if attrib:
|
||||
typemap[dict](elem, attrib)
|
||||
|
||||
for item in children:
|
||||
if callable(item):
|
||||
item = item()
|
||||
t = typemap.get(type(item))
|
||||
if t is None:
|
||||
if ET.iselement(item):
|
||||
elem.append(item)
|
||||
continue
|
||||
for basetype in type(item).__mro__:
|
||||
# See if the typemap knows of any of this type's bases.
|
||||
t = typemap.get(basetype)
|
||||
if t is not None:
|
||||
break
|
||||
else:
|
||||
raise TypeError("bad argument type: %s(%r)" %
|
||||
(type(item).__name__, item))
|
||||
v = t(elem, item)
|
||||
if v:
|
||||
typemap.get(type(v))(elem, v)
|
||||
|
||||
return elem
|
||||
|
||||
def __getattr__(self, tag):
|
||||
return partial(self, tag)
|
||||
|
||||
# Allow subscripting ElementMaker in type annotions (PEP 560)
|
||||
def __class_getitem__(cls, item):
|
||||
return _GenericAlias(cls, item)
|
||||
|
||||
|
||||
# create factory object
|
||||
E = ElementMaker()
|
||||
@@ -1,580 +0,0 @@
|
||||
# Configurable Element class lookup
|
||||
|
||||
################################################################################
|
||||
# Custom Element classes
|
||||
|
||||
cdef public class ElementBase(_Element) [ type LxmlElementBaseType,
|
||||
object LxmlElementBase ]:
|
||||
"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
||||
|
||||
The public Element class. All custom Element classes must inherit
|
||||
from this one. To create an Element, use the `Element()` factory.
|
||||
|
||||
BIG FAT WARNING: Subclasses *must not* override __init__ or
|
||||
__new__ as it is absolutely undefined when these objects will be
|
||||
created or destroyed. All persistent state of Elements must be
|
||||
stored in the underlying XML. If you really need to initialize
|
||||
the object after creation, you can implement an ``_init(self)``
|
||||
method that will be called directly after object creation.
|
||||
|
||||
Subclasses of this class can be instantiated to create a new
|
||||
Element. By default, the tag name will be the class name and the
|
||||
namespace will be empty. You can modify this with the following
|
||||
class attributes:
|
||||
|
||||
* TAG - the tag name, possibly containing a namespace in Clark
|
||||
notation
|
||||
|
||||
* NAMESPACE - the default namespace URI, unless provided as part
|
||||
of the TAG attribute.
|
||||
|
||||
* HTML - flag if the class is an HTML tag, as opposed to an XML
|
||||
tag. This only applies to un-namespaced tags and defaults to
|
||||
false (i.e. XML).
|
||||
|
||||
* PARSER - the parser that provides the configuration for the
|
||||
newly created document. Providing an HTML parser here will
|
||||
default to creating an HTML element.
|
||||
|
||||
In user code, the latter three are commonly inherited in class
|
||||
hierarchies that implement a common namespace.
|
||||
"""
|
||||
def __init__(self, *children, attrib=None, nsmap=None, **_extra):
|
||||
"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
||||
"""
|
||||
cdef bint is_html = 0
|
||||
cdef _BaseParser parser
|
||||
cdef _Element last_child
|
||||
# don't use normal attribute access as it might be overridden
|
||||
_getattr = object.__getattribute__
|
||||
try:
|
||||
namespace = _utf8(_getattr(self, 'NAMESPACE'))
|
||||
except AttributeError:
|
||||
namespace = None
|
||||
try:
|
||||
ns, tag = _getNsTag(_getattr(self, 'TAG'))
|
||||
if ns is not None:
|
||||
namespace = ns
|
||||
except AttributeError:
|
||||
tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__'))
|
||||
if b'.' in tag:
|
||||
tag = tag.split(b'.')[-1]
|
||||
try:
|
||||
parser = _getattr(self, 'PARSER')
|
||||
except AttributeError:
|
||||
parser = None
|
||||
for child in children:
|
||||
if isinstance(child, _Element):
|
||||
parser = (<_Element>child)._doc._parser
|
||||
break
|
||||
if isinstance(parser, HTMLParser):
|
||||
is_html = 1
|
||||
if namespace is None:
|
||||
try:
|
||||
is_html = _getattr(self, 'HTML')
|
||||
except AttributeError:
|
||||
pass
|
||||
_initNewElement(self, is_html, tag, namespace, parser,
|
||||
attrib, nsmap, _extra)
|
||||
last_child = None
|
||||
for child in children:
|
||||
if _isString(child):
|
||||
if last_child is None:
|
||||
_setNodeText(self._c_node,
|
||||
(_collectText(self._c_node.children) or '') + child)
|
||||
else:
|
||||
_setTailText(last_child._c_node,
|
||||
(_collectText(last_child._c_node.next) or '') + child)
|
||||
elif isinstance(child, _Element):
|
||||
last_child = child
|
||||
_appendChild(self, last_child)
|
||||
elif isinstance(child, type) and issubclass(child, ElementBase):
|
||||
last_child = child()
|
||||
_appendChild(self, last_child)
|
||||
else:
|
||||
raise TypeError, f"Invalid child type: {type(child)!r}"
|
||||
|
||||
cdef class CommentBase(_Comment):
|
||||
"""All custom Comment classes must inherit from this one.
|
||||
|
||||
To create an XML Comment instance, use the ``Comment()`` factory.
|
||||
|
||||
Subclasses *must not* override __init__ or __new__ as it is
|
||||
absolutely undefined when these objects will be created or
|
||||
destroyed. All persistent state of Comments must be stored in the
|
||||
underlying XML. If you really need to initialize the object after
|
||||
creation, you can implement an ``_init(self)`` method that will be
|
||||
called after object creation.
|
||||
"""
|
||||
def __init__(self, text):
|
||||
# copied from Comment() factory
|
||||
cdef _Document doc
|
||||
cdef xmlDoc* c_doc
|
||||
if text is None:
|
||||
text = b''
|
||||
else:
|
||||
text = _utf8(text)
|
||||
c_doc = _newXMLDoc()
|
||||
doc = _documentFactory(c_doc, None)
|
||||
self._c_node = _createComment(c_doc, _xcstr(text))
|
||||
if self._c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
||||
_registerProxy(self, doc, self._c_node)
|
||||
self._init()
|
||||
|
||||
cdef class PIBase(_ProcessingInstruction):
|
||||
"""All custom Processing Instruction classes must inherit from this one.
|
||||
|
||||
To create an XML ProcessingInstruction instance, use the ``PI()``
|
||||
factory.
|
||||
|
||||
Subclasses *must not* override __init__ or __new__ as it is
|
||||
absolutely undefined when these objects will be created or
|
||||
destroyed. All persistent state of PIs must be stored in the
|
||||
underlying XML. If you really need to initialize the object after
|
||||
creation, you can implement an ``_init(self)`` method that will be
|
||||
called after object creation.
|
||||
"""
|
||||
def __init__(self, target, text=None):
|
||||
# copied from PI() factory
|
||||
cdef _Document doc
|
||||
cdef xmlDoc* c_doc
|
||||
target = _utf8(target)
|
||||
if text is None:
|
||||
text = b''
|
||||
else:
|
||||
text = _utf8(text)
|
||||
c_doc = _newXMLDoc()
|
||||
doc = _documentFactory(c_doc, None)
|
||||
self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text))
|
||||
if self._c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
||||
_registerProxy(self, doc, self._c_node)
|
||||
self._init()
|
||||
|
||||
cdef class EntityBase(_Entity):
|
||||
"""All custom Entity classes must inherit from this one.
|
||||
|
||||
To create an XML Entity instance, use the ``Entity()`` factory.
|
||||
|
||||
Subclasses *must not* override __init__ or __new__ as it is
|
||||
absolutely undefined when these objects will be created or
|
||||
destroyed. All persistent state of Entities must be stored in the
|
||||
underlying XML. If you really need to initialize the object after
|
||||
creation, you can implement an ``_init(self)`` method that will be
|
||||
called after object creation.
|
||||
"""
|
||||
def __init__(self, name):
|
||||
cdef _Document doc
|
||||
cdef xmlDoc* c_doc
|
||||
name_utf = _utf8(name)
|
||||
c_name = _xcstr(name_utf)
|
||||
if c_name[0] == c'#':
|
||||
if not _characterReferenceIsValid(c_name + 1):
|
||||
raise ValueError, f"Invalid character reference: '{name}'"
|
||||
elif not _xmlNameIsValid(c_name):
|
||||
raise ValueError, f"Invalid entity reference: '{name}'"
|
||||
c_doc = _newXMLDoc()
|
||||
doc = _documentFactory(c_doc, None)
|
||||
self._c_node = _createEntity(c_doc, c_name)
|
||||
if self._c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
||||
_registerProxy(self, doc, self._c_node)
|
||||
self._init()
|
||||
|
||||
|
||||
cdef int _validateNodeClass(xmlNode* c_node, cls) except -1:
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
expected = ElementBase
|
||||
elif c_node.type == tree.XML_COMMENT_NODE:
|
||||
expected = CommentBase
|
||||
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
||||
expected = EntityBase
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
expected = PIBase
|
||||
else:
|
||||
assert False, f"Unknown node type: {c_node.type}"
|
||||
|
||||
if not (isinstance(cls, type) and issubclass(cls, expected)):
|
||||
raise TypeError(
|
||||
f"result of class lookup must be subclass of {type(expected)}, got {type(cls)}")
|
||||
return 0
|
||||
|
||||
|
||||
################################################################################
|
||||
# Element class lookup
|
||||
|
||||
ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*)
|
||||
|
||||
# class to store element class lookup functions
|
||||
cdef public class ElementClassLookup [ type LxmlElementClassLookupType,
|
||||
object LxmlElementClassLookup ]:
|
||||
"""ElementClassLookup(self)
|
||||
Superclass of Element class lookups.
|
||||
"""
|
||||
cdef _element_class_lookup_function _lookup_function
|
||||
|
||||
|
||||
cdef public class FallbackElementClassLookup(ElementClassLookup) \
|
||||
[ type LxmlFallbackElementClassLookupType,
|
||||
object LxmlFallbackElementClassLookup ]:
|
||||
"""FallbackElementClassLookup(self, fallback=None)
|
||||
|
||||
Superclass of Element class lookups with additional fallback.
|
||||
"""
|
||||
cdef readonly ElementClassLookup fallback
|
||||
cdef _element_class_lookup_function _fallback_function
|
||||
def __cinit__(self):
|
||||
# fall back to default lookup
|
||||
self._fallback_function = _lookupDefaultElementClass
|
||||
|
||||
def __init__(self, ElementClassLookup fallback=None):
|
||||
if fallback is not None:
|
||||
self._setFallback(fallback)
|
||||
else:
|
||||
self._fallback_function = _lookupDefaultElementClass
|
||||
|
||||
cdef void _setFallback(self, ElementClassLookup lookup):
|
||||
"""Sets the fallback scheme for this lookup method.
|
||||
"""
|
||||
self.fallback = lookup
|
||||
self._fallback_function = lookup._lookup_function
|
||||
if self._fallback_function is NULL:
|
||||
self._fallback_function = _lookupDefaultElementClass
|
||||
|
||||
def set_fallback(self, ElementClassLookup lookup not None):
|
||||
"""set_fallback(self, lookup)
|
||||
|
||||
Sets the fallback scheme for this lookup method.
|
||||
"""
|
||||
self._setFallback(lookup)
|
||||
|
||||
cdef inline object _callLookupFallback(FallbackElementClassLookup lookup,
|
||||
_Document doc, xmlNode* c_node):
|
||||
return lookup._fallback_function(lookup.fallback, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# default lookup scheme
|
||||
|
||||
cdef class ElementDefaultClassLookup(ElementClassLookup):
|
||||
"""ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None)
|
||||
Element class lookup scheme that always returns the default Element
|
||||
class.
|
||||
|
||||
The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
|
||||
accept the respective Element classes.
|
||||
"""
|
||||
cdef readonly object element_class
|
||||
cdef readonly object comment_class
|
||||
cdef readonly object pi_class
|
||||
cdef readonly object entity_class
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _lookupDefaultElementClass
|
||||
|
||||
def __init__(self, element=None, comment=None, pi=None, entity=None):
|
||||
if element is None:
|
||||
self.element_class = _Element
|
||||
elif issubclass(element, ElementBase):
|
||||
self.element_class = element
|
||||
else:
|
||||
raise TypeError, "element class must be subclass of ElementBase"
|
||||
|
||||
if comment is None:
|
||||
self.comment_class = _Comment
|
||||
elif issubclass(comment, CommentBase):
|
||||
self.comment_class = comment
|
||||
else:
|
||||
raise TypeError, "comment class must be subclass of CommentBase"
|
||||
|
||||
if entity is None:
|
||||
self.entity_class = _Entity
|
||||
elif issubclass(entity, EntityBase):
|
||||
self.entity_class = entity
|
||||
else:
|
||||
raise TypeError, "Entity class must be subclass of EntityBase"
|
||||
|
||||
if pi is None:
|
||||
self.pi_class = None # special case, see below
|
||||
elif issubclass(pi, PIBase):
|
||||
self.pi_class = pi
|
||||
else:
|
||||
raise TypeError, "PI class must be subclass of PIBase"
|
||||
|
||||
cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node):
|
||||
"Trivial class lookup function that always returns the default class."
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
if state is not None:
|
||||
return (<ElementDefaultClassLookup>state).element_class
|
||||
else:
|
||||
return _Element
|
||||
elif c_node.type == tree.XML_COMMENT_NODE:
|
||||
if state is not None:
|
||||
return (<ElementDefaultClassLookup>state).comment_class
|
||||
else:
|
||||
return _Comment
|
||||
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
||||
if state is not None:
|
||||
return (<ElementDefaultClassLookup>state).entity_class
|
||||
else:
|
||||
return _Entity
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
if state is None or (<ElementDefaultClassLookup>state).pi_class is None:
|
||||
# special case XSLT-PI
|
||||
if c_node.name is not NULL and c_node.content is not NULL:
|
||||
if tree.xmlStrcmp(c_node.name, <unsigned char*>"xml-stylesheet") == 0:
|
||||
if tree.xmlStrstr(c_node.content, <unsigned char*>"text/xsl") is not NULL or \
|
||||
tree.xmlStrstr(c_node.content, <unsigned char*>"text/xml") is not NULL:
|
||||
return _XSLTProcessingInstruction
|
||||
return _ProcessingInstruction
|
||||
else:
|
||||
return (<ElementDefaultClassLookup>state).pi_class
|
||||
else:
|
||||
assert False, f"Unknown node type: {c_node.type}"
|
||||
|
||||
|
||||
################################################################################
|
||||
# attribute based lookup scheme
|
||||
|
||||
cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup):
|
||||
"""AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None)
|
||||
Checks an attribute of an Element and looks up the value in a
|
||||
class dictionary.
|
||||
|
||||
Arguments:
|
||||
- attribute name - '{ns}name' style string
|
||||
- class mapping - Python dict mapping attribute values to Element classes
|
||||
- fallback - optional fallback lookup mechanism
|
||||
|
||||
A None key in the class mapping will be checked if the attribute is
|
||||
missing.
|
||||
"""
|
||||
cdef object _class_mapping
|
||||
cdef tuple _pytag
|
||||
cdef const_xmlChar* _c_ns
|
||||
cdef const_xmlChar* _c_name
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _attribute_class_lookup
|
||||
|
||||
def __init__(self, attribute_name, class_mapping,
|
||||
ElementClassLookup fallback=None):
|
||||
self._pytag = _getNsTag(attribute_name)
|
||||
ns, name = self._pytag
|
||||
if ns is None:
|
||||
self._c_ns = NULL
|
||||
else:
|
||||
self._c_ns = _xcstr(ns)
|
||||
self._c_name = _xcstr(name)
|
||||
self._class_mapping = dict(class_mapping)
|
||||
|
||||
FallbackElementClassLookup.__init__(self, fallback)
|
||||
|
||||
cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node):
|
||||
cdef AttributeBasedElementClassLookup lookup
|
||||
cdef python.PyObject* dict_result
|
||||
|
||||
lookup = <AttributeBasedElementClassLookup>state
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
value = _attributeValueFromNsName(
|
||||
c_node, lookup._c_ns, lookup._c_name)
|
||||
dict_result = python.PyDict_GetItem(lookup._class_mapping, value)
|
||||
if dict_result is not NULL:
|
||||
cls = <object>dict_result
|
||||
_validateNodeClass(c_node, cls)
|
||||
return cls
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# per-parser lookup scheme
|
||||
|
||||
cdef class ParserBasedElementClassLookup(FallbackElementClassLookup):
|
||||
"""ParserBasedElementClassLookup(self, fallback=None)
|
||||
Element class lookup based on the XML parser.
|
||||
"""
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _parser_class_lookup
|
||||
|
||||
cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node):
|
||||
if doc._parser._class_lookup is not None:
|
||||
return doc._parser._class_lookup._lookup_function(
|
||||
doc._parser._class_lookup, doc, c_node)
|
||||
return _callLookupFallback(<FallbackElementClassLookup>state, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# custom class lookup based on node type, namespace, name
|
||||
|
||||
cdef class CustomElementClassLookup(FallbackElementClassLookup):
|
||||
"""CustomElementClassLookup(self, fallback=None)
|
||||
Element class lookup based on a subclass method.
|
||||
|
||||
You can inherit from this class and override the method::
|
||||
|
||||
lookup(self, type, doc, namespace, name)
|
||||
|
||||
to lookup the element class for a node. Arguments of the method:
|
||||
* type: one of 'element', 'comment', 'PI', 'entity'
|
||||
* doc: document that the node is in
|
||||
* namespace: namespace URI of the node (or None for comments/PIs/entities)
|
||||
* name: name of the element/entity, None for comments, target for PIs
|
||||
|
||||
If you return None from this method, the fallback will be called.
|
||||
"""
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _custom_class_lookup
|
||||
|
||||
def lookup(self, type, doc, namespace, name):
|
||||
"lookup(self, type, doc, namespace, name)"
|
||||
return None
|
||||
|
||||
cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node):
|
||||
cdef CustomElementClassLookup lookup
|
||||
|
||||
lookup = <CustomElementClassLookup>state
|
||||
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
element_type = "element"
|
||||
elif c_node.type == tree.XML_COMMENT_NODE:
|
||||
element_type = "comment"
|
||||
elif c_node.type == tree.XML_PI_NODE:
|
||||
element_type = "PI"
|
||||
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
||||
element_type = "entity"
|
||||
else:
|
||||
element_type = "element"
|
||||
if c_node.name is NULL:
|
||||
name = None
|
||||
else:
|
||||
name = funicode(c_node.name)
|
||||
c_str = tree._getNs(c_node)
|
||||
ns = funicode(c_str) if c_str is not NULL else None
|
||||
|
||||
cls = lookup.lookup(element_type, doc, ns, name)
|
||||
if cls is not None:
|
||||
_validateNodeClass(c_node, cls)
|
||||
return cls
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
|
||||
################################################################################
|
||||
# read-only tree based class lookup
|
||||
|
||||
cdef class PythonElementClassLookup(FallbackElementClassLookup):
|
||||
"""PythonElementClassLookup(self, fallback=None)
|
||||
Element class lookup based on a subclass method.
|
||||
|
||||
This class lookup scheme allows access to the entire XML tree in
|
||||
read-only mode. To use it, re-implement the ``lookup(self, doc,
|
||||
root)`` method in a subclass::
|
||||
|
||||
from lxml import etree, pyclasslookup
|
||||
|
||||
class MyElementClass(etree.ElementBase):
|
||||
honkey = True
|
||||
|
||||
class MyLookup(pyclasslookup.PythonElementClassLookup):
|
||||
def lookup(self, doc, root):
|
||||
if root.tag == "sometag":
|
||||
return MyElementClass
|
||||
else:
|
||||
for child in root:
|
||||
if child.tag == "someothertag":
|
||||
return MyElementClass
|
||||
# delegate to default
|
||||
return None
|
||||
|
||||
If you return None from this method, the fallback will be called.
|
||||
|
||||
The first argument is the opaque document instance that contains
|
||||
the Element. The second argument is a lightweight Element proxy
|
||||
implementation that is only valid during the lookup. Do not try
|
||||
to keep a reference to it. Once the lookup is done, the proxy
|
||||
will be invalid.
|
||||
|
||||
Also, you cannot wrap such a read-only Element in an ElementTree,
|
||||
and you must take care not to keep a reference to them outside of
|
||||
the `lookup()` method.
|
||||
|
||||
Note that the API of the Element objects is not complete. It is
|
||||
purely read-only and does not support all features of the normal
|
||||
`lxml.etree` API (such as XPath, extended slicing or some
|
||||
iteration methods).
|
||||
|
||||
See https://lxml.de/element_classes.html
|
||||
"""
|
||||
def __cinit__(self):
|
||||
self._lookup_function = _python_class_lookup
|
||||
|
||||
def lookup(self, doc, element):
|
||||
"""lookup(self, doc, element)
|
||||
|
||||
Override this method to implement your own lookup scheme.
|
||||
"""
|
||||
return None
|
||||
|
||||
cdef object _python_class_lookup(state, _Document doc, tree.xmlNode* c_node):
|
||||
cdef PythonElementClassLookup lookup
|
||||
cdef _ReadOnlyProxy proxy
|
||||
lookup = <PythonElementClassLookup>state
|
||||
|
||||
proxy = _newReadOnlyProxy(None, c_node)
|
||||
cls = lookup.lookup(doc, proxy)
|
||||
_freeReadOnlyProxies(proxy)
|
||||
|
||||
if cls is not None:
|
||||
_validateNodeClass(c_node, cls)
|
||||
return cls
|
||||
return _callLookupFallback(lookup, doc, c_node)
|
||||
|
||||
################################################################################
|
||||
# Global setup
|
||||
|
||||
cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS
|
||||
cdef object ELEMENT_CLASS_LOOKUP_STATE
|
||||
|
||||
cdef void _setElementClassLookupFunction(
|
||||
_element_class_lookup_function function, object state):
|
||||
global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE
|
||||
if function is NULL:
|
||||
state = DEFAULT_ELEMENT_CLASS_LOOKUP
|
||||
function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function
|
||||
|
||||
ELEMENT_CLASS_LOOKUP_STATE = state
|
||||
LOOKUP_ELEMENT_CLASS = function
|
||||
|
||||
def set_element_class_lookup(ElementClassLookup lookup = None):
|
||||
"""set_element_class_lookup(lookup = None)
|
||||
|
||||
Set the global element class lookup method.
|
||||
|
||||
This defines the main entry point for looking up element implementations.
|
||||
The standard implementation uses the :class:`ParserBasedElementClassLookup`
|
||||
to delegate to different lookup schemes for each parser.
|
||||
|
||||
.. warning::
|
||||
|
||||
This should only be changed by applications, not by library packages.
|
||||
In most cases, parser specific lookups should be preferred,
|
||||
which can be configured via
|
||||
:meth:`~lxml.etree.XMLParser.set_element_class_lookup`
|
||||
(and the same for HTML parsers).
|
||||
|
||||
Globally replacing the element class lookup by something other than a
|
||||
:class:`ParserBasedElementClassLookup` will prevent parser specific lookup
|
||||
schemes from working. Several tools rely on parser specific lookups,
|
||||
including :mod:`lxml.html` and :mod:`lxml.objectify`.
|
||||
"""
|
||||
if lookup is None or lookup._lookup_function is NULL:
|
||||
_setElementClassLookupFunction(NULL, None)
|
||||
else:
|
||||
_setElementClassLookupFunction(lookup._lookup_function, lookup)
|
||||
|
||||
# default setup: parser delegation
|
||||
cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP
|
||||
DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup()
|
||||
|
||||
set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP)
|
||||
@@ -1,215 +0,0 @@
|
||||
# functions for tree cleanup and removing elements from subtrees
|
||||
|
||||
def cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None):
|
||||
"""cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None)
|
||||
|
||||
Remove all namespace declarations from a subtree that are not used
|
||||
by any of the elements or attributes in that tree.
|
||||
|
||||
If a 'top_nsmap' is provided, it must be a mapping from prefixes
|
||||
to namespace URIs. These namespaces will be declared on the top
|
||||
element of the subtree before running the cleanup, which allows
|
||||
moving namespace declarations to the top of the tree.
|
||||
|
||||
If a 'keep_ns_prefixes' is provided, it must be a list of prefixes.
|
||||
These prefixes will not be removed as part of the cleanup.
|
||||
"""
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
c_element = element._c_node
|
||||
|
||||
if top_nsmap:
|
||||
doc = element._doc
|
||||
# declare namespaces from nsmap, then apply them to the subtree
|
||||
_setNodeNamespaces(c_element, doc, None, top_nsmap)
|
||||
moveNodeToDocument(doc, c_element.doc, c_element)
|
||||
|
||||
keep_ns_prefixes = (
|
||||
set([_utf8(prefix) for prefix in keep_ns_prefixes])
|
||||
if keep_ns_prefixes else None)
|
||||
|
||||
_removeUnusedNamespaceDeclarations(c_element, keep_ns_prefixes)
|
||||
|
||||
|
||||
def strip_attributes(tree_or_element, *attribute_names):
|
||||
"""strip_attributes(tree_or_element, *attribute_names)
|
||||
|
||||
Delete all attributes with the provided attribute names from an
|
||||
Element (or ElementTree) and its descendants.
|
||||
|
||||
Attribute names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_attributes(root_element,
|
||||
'simpleattr',
|
||||
'{http://some/ns}attrname',
|
||||
'{http://other/ns}*')
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not attribute_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, attribute_names)
|
||||
matcher.cacheTags(element._doc)
|
||||
if matcher.rejectsAllAttributes():
|
||||
return
|
||||
_strip_attributes(element._c_node, matcher)
|
||||
|
||||
|
||||
cdef _strip_attributes(xmlNode* c_node, _MultiTagMatcher matcher):
|
||||
cdef xmlAttr* c_attr
|
||||
cdef xmlAttr* c_next_attr
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
c_attr = c_node.properties
|
||||
while c_attr is not NULL:
|
||||
c_next_attr = c_attr.next
|
||||
if matcher.matchesAttribute(c_attr):
|
||||
tree.xmlRemoveProp(c_attr)
|
||||
c_attr = c_next_attr
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
|
||||
|
||||
def strip_elements(tree_or_element, *tag_names, bint with_tail=True):
|
||||
"""strip_elements(tree_or_element, *tag_names, with_tail=True)
|
||||
|
||||
Delete all elements with the provided tag names from a tree or
|
||||
subtree. This will remove the elements and their entire subtree,
|
||||
including all their attributes, text content and descendants. It
|
||||
will also remove the tail text of the element unless you
|
||||
explicitly set the ``with_tail`` keyword argument option to False.
|
||||
|
||||
Tag names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Note that this will not delete the element (or ElementTree root
|
||||
element) that you passed even if it matches. It will only treat
|
||||
its descendants. If you want to include the root element, check
|
||||
its tag name directly before even calling this function.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_elements(some_element,
|
||||
'simpletagname', # non-namespaced tag
|
||||
'{http://some/ns}tagname', # namespaced tag
|
||||
'{http://some/other/ns}*' # any tag from a namespace
|
||||
lxml.etree.Comment # comments
|
||||
)
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
doc = _documentOrRaise(tree_or_element)
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not tag_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
|
||||
matcher.cacheTags(doc)
|
||||
if matcher.rejectsAll():
|
||||
return
|
||||
|
||||
if isinstance(tree_or_element, _ElementTree):
|
||||
# include PIs and comments next to the root node
|
||||
if matcher.matchesType(tree.XML_COMMENT_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail)
|
||||
if matcher.matchesType(tree.XML_PI_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail)
|
||||
_strip_elements(doc, element._c_node, matcher, with_tail)
|
||||
|
||||
cdef _strip_elements(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher,
|
||||
bint with_tail):
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_next
|
||||
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
# we run through the children here to prevent any problems
|
||||
# with the tree iteration which would occur if we unlinked the
|
||||
# c_node itself
|
||||
c_child = _findChildForwards(c_node, 0)
|
||||
while c_child is not NULL:
|
||||
c_next = _nextElement(c_child)
|
||||
if matcher.matches(c_child):
|
||||
if c_child.type == tree.XML_ELEMENT_NODE:
|
||||
if not with_tail:
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
_removeNode(doc, c_child)
|
||||
else:
|
||||
if with_tail:
|
||||
_removeText(c_child.next)
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
attemptDeallocation(c_child)
|
||||
c_child = c_next
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
|
||||
|
||||
def strip_tags(tree_or_element, *tag_names):
|
||||
"""strip_tags(tree_or_element, *tag_names)
|
||||
|
||||
Delete all elements with the provided tag names from a tree or
|
||||
subtree. This will remove the elements and their attributes, but
|
||||
*not* their text/tail content or descendants. Instead, it will
|
||||
merge the text content and children of the element into its
|
||||
parent.
|
||||
|
||||
Tag names can contain wildcards as in `_Element.iter`.
|
||||
|
||||
Note that this will not delete the element (or ElementTree root
|
||||
element) that you passed even if it matches. It will only treat
|
||||
its descendants.
|
||||
|
||||
Example usage::
|
||||
|
||||
strip_tags(some_element,
|
||||
'simpletagname', # non-namespaced tag
|
||||
'{http://some/ns}tagname', # namespaced tag
|
||||
'{http://some/other/ns}*' # any tag from a namespace
|
||||
Comment # comments (including their text!)
|
||||
)
|
||||
"""
|
||||
cdef _MultiTagMatcher matcher
|
||||
doc = _documentOrRaise(tree_or_element)
|
||||
element = _rootNodeOrRaise(tree_or_element)
|
||||
if not tag_names:
|
||||
return
|
||||
|
||||
matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
|
||||
matcher.cacheTags(doc)
|
||||
if matcher.rejectsAll():
|
||||
return
|
||||
|
||||
if isinstance(tree_or_element, _ElementTree):
|
||||
# include PIs and comments next to the root node
|
||||
if matcher.matchesType(tree.XML_COMMENT_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0)
|
||||
if matcher.matchesType(tree.XML_PI_NODE):
|
||||
_removeSiblings(element._c_node, tree.XML_PI_NODE, 0)
|
||||
_strip_tags(doc, element._c_node, matcher)
|
||||
|
||||
cdef _strip_tags(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher):
|
||||
cdef xmlNode* c_child
|
||||
cdef xmlNode* c_next
|
||||
|
||||
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
||||
if c_node.type == tree.XML_ELEMENT_NODE:
|
||||
# we run through the children here to prevent any problems
|
||||
# with the tree iteration which would occur if we unlinked the
|
||||
# c_node itself
|
||||
c_child = _findChildForwards(c_node, 0)
|
||||
while c_child is not NULL:
|
||||
if not matcher.matches(c_child):
|
||||
c_child = _nextElement(c_child)
|
||||
continue
|
||||
if c_child.type == tree.XML_ELEMENT_NODE:
|
||||
c_next = _findChildForwards(c_child, 0) or _nextElement(c_child)
|
||||
_replaceNodeByChildren(doc, c_child)
|
||||
if not attemptDeallocation(c_child):
|
||||
if c_child.nsDef is not NULL:
|
||||
# make namespaces absolute
|
||||
moveNodeToDocument(doc, doc._c_doc, c_child)
|
||||
c_child = c_next
|
||||
else:
|
||||
c_next = _nextElement(c_child)
|
||||
tree.xmlUnlinkNode(c_child)
|
||||
attemptDeallocation(c_child)
|
||||
c_child = c_next
|
||||
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
||||
@@ -1,101 +0,0 @@
|
||||
"""CSS Selectors based on XPath.
|
||||
|
||||
This module supports selecting XML/HTML tags based on CSS selectors.
|
||||
See the `CSSSelector` class for details.
|
||||
|
||||
This is a thin wrapper around cssselect 0.7 or later.
|
||||
"""
|
||||
|
||||
|
||||
from . import etree
|
||||
try:
|
||||
import cssselect as external_cssselect
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
'cssselect does not seem to be installed. '
|
||||
'See https://pypi.org/project/cssselect/')
|
||||
|
||||
|
||||
SelectorSyntaxError = external_cssselect.SelectorSyntaxError
|
||||
ExpressionError = external_cssselect.ExpressionError
|
||||
SelectorError = external_cssselect.SelectorError
|
||||
|
||||
|
||||
__all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
|
||||
'CSSSelector']
|
||||
|
||||
|
||||
class LxmlTranslator(external_cssselect.GenericTranslator):
|
||||
"""
|
||||
A custom CSS selector to XPath translator with lxml-specific extensions.
|
||||
"""
|
||||
def xpath_contains_function(self, xpath, function):
|
||||
# Defined there, removed in later drafts:
|
||||
# http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
|
||||
if function.argument_types() not in (['STRING'], ['IDENT']):
|
||||
raise ExpressionError(
|
||||
"Expected a single string or ident for :contains(), got %r"
|
||||
% function.arguments)
|
||||
value = function.arguments[0].value
|
||||
return xpath.add_condition(
|
||||
'contains(__lxml_internal_css:lower-case(string(.)), %s)'
|
||||
% self.xpath_literal(value.lower()))
|
||||
|
||||
|
||||
class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
|
||||
"""
|
||||
lxml extensions + HTML support.
|
||||
"""
|
||||
|
||||
|
||||
def _make_lower_case(context, s):
|
||||
return s.lower()
|
||||
|
||||
ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
|
||||
ns.prefix = '__lxml_internal_css'
|
||||
ns['lower-case'] = _make_lower_case
|
||||
|
||||
|
||||
class CSSSelector(etree.XPath):
|
||||
"""A CSS selector.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from lxml import etree, cssselect
|
||||
>>> select = cssselect.CSSSelector("a tag > child")
|
||||
|
||||
>>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
|
||||
>>> [ el.tag for el in select(root) ]
|
||||
['child']
|
||||
|
||||
To use CSS namespaces, you need to pass a prefix-to-namespace
|
||||
mapping as ``namespaces`` keyword argument::
|
||||
|
||||
>>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
||||
>>> select_ns = cssselect.CSSSelector('root > rdf|Description',
|
||||
... namespaces={'rdf': rdfns})
|
||||
|
||||
>>> rdf = etree.XML((
|
||||
... '<root xmlns:rdf="%s">'
|
||||
... '<rdf:Description>blah</rdf:Description>'
|
||||
... '</root>') % rdfns)
|
||||
>>> [(el.tag, el.text) for el in select_ns(rdf)]
|
||||
[('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
|
||||
|
||||
"""
|
||||
def __init__(self, css, namespaces=None, translator='xml'):
|
||||
if translator == 'xml':
|
||||
translator = LxmlTranslator()
|
||||
elif translator == 'html':
|
||||
translator = LxmlHTMLTranslator()
|
||||
elif translator == 'xhtml':
|
||||
translator = LxmlHTMLTranslator(xhtml=True)
|
||||
path = translator.css_to_xpath(css)
|
||||
super().__init__(path, namespaces=namespaces)
|
||||
self.css = css
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s %x for %r>' % (
|
||||
self.__class__.__name__,
|
||||
abs(id(self)),
|
||||
self.css)
|
||||
@@ -1,36 +0,0 @@
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _MemDebug:
|
||||
"""Debugging support for the memory allocation in libxml2.
|
||||
"""
|
||||
def bytes_used(self):
|
||||
"""bytes_used(self)
|
||||
|
||||
Returns the total amount of memory (in bytes) currently used by libxml2.
|
||||
Note that libxml2 constrains this value to a C int, which limits
|
||||
the accuracy on 64 bit systems.
|
||||
"""
|
||||
return tree.xmlMemUsed()
|
||||
|
||||
def blocks_used(self):
|
||||
"""blocks_used(self)
|
||||
|
||||
Returns the total number of memory blocks currently allocated by libxml2.
|
||||
Note that libxml2 constrains this value to a C int, which limits
|
||||
the accuracy on 64 bit systems.
|
||||
"""
|
||||
return tree.xmlMemBlocks()
|
||||
|
||||
def dict_size(self):
|
||||
"""dict_size(self)
|
||||
|
||||
Returns the current size of the global name dictionary used by libxml2
|
||||
for the current thread. Each thread has its own dictionary.
|
||||
"""
|
||||
c_dict = __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL)
|
||||
if c_dict is NULL:
|
||||
raise MemoryError()
|
||||
return tree.xmlDictSize(c_dict)
|
||||
|
||||
|
||||
memory_debugger = _MemDebug()
|
||||
@@ -1,178 +0,0 @@
|
||||
# Custom resolver API
|
||||
|
||||
ctypedef enum _InputDocumentDataType:
|
||||
PARSER_DATA_INVALID
|
||||
PARSER_DATA_EMPTY
|
||||
PARSER_DATA_STRING
|
||||
PARSER_DATA_FILENAME
|
||||
PARSER_DATA_FILE
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _InputDocument:
|
||||
cdef _InputDocumentDataType _type
|
||||
cdef bytes _data_bytes
|
||||
cdef object _filename
|
||||
cdef object _file
|
||||
cdef bint _close_file
|
||||
|
||||
def __cinit__(self):
|
||||
self._type = PARSER_DATA_INVALID
|
||||
|
||||
|
||||
cdef class Resolver:
|
||||
"This is the base class of all resolvers."
|
||||
def resolve(self, system_url, public_id, context):
|
||||
"""resolve(self, system_url, public_id, context)
|
||||
|
||||
Override this method to resolve an external source by
|
||||
``system_url`` and ``public_id``. The third argument is an
|
||||
opaque context object.
|
||||
|
||||
Return the result of one of the ``resolve_*()`` methods.
|
||||
"""
|
||||
return None
|
||||
|
||||
def resolve_empty(self, context):
|
||||
"""resolve_empty(self, context)
|
||||
|
||||
Return an empty input document.
|
||||
|
||||
Pass context as parameter.
|
||||
"""
|
||||
cdef _InputDocument doc_ref
|
||||
doc_ref = _InputDocument()
|
||||
doc_ref._type = PARSER_DATA_EMPTY
|
||||
return doc_ref
|
||||
|
||||
def resolve_string(self, string, context, *, base_url=None):
|
||||
"""resolve_string(self, string, context, base_url=None)
|
||||
|
||||
Return a parsable string as input document.
|
||||
|
||||
Pass data string and context as parameters. You can pass the
|
||||
source URL or filename through the ``base_url`` keyword
|
||||
argument.
|
||||
"""
|
||||
cdef _InputDocument doc_ref
|
||||
if isinstance(string, unicode):
|
||||
string = (<unicode>string).encode('utf8')
|
||||
elif not isinstance(string, bytes):
|
||||
raise TypeError, "argument must be a byte string or unicode string"
|
||||
doc_ref = _InputDocument()
|
||||
doc_ref._type = PARSER_DATA_STRING
|
||||
doc_ref._data_bytes = string
|
||||
if base_url is not None:
|
||||
doc_ref._filename = _encodeFilename(base_url)
|
||||
return doc_ref
|
||||
|
||||
def resolve_filename(self, filename, context):
|
||||
"""resolve_filename(self, filename, context)
|
||||
|
||||
Return the name of a parsable file as input document.
|
||||
|
||||
Pass filename and context as parameters. You can also pass a
|
||||
URL with an HTTP, FTP or file target.
|
||||
"""
|
||||
cdef _InputDocument doc_ref
|
||||
doc_ref = _InputDocument()
|
||||
doc_ref._type = PARSER_DATA_FILENAME
|
||||
doc_ref._filename = _encodeFilename(filename)
|
||||
return doc_ref
|
||||
|
||||
def resolve_file(self, f, context, *, base_url=None, bint close=True):
|
||||
"""resolve_file(self, f, context, base_url=None, close=True)
|
||||
|
||||
Return an open file-like object as input document.
|
||||
|
||||
Pass open file and context as parameters. You can pass the
|
||||
base URL or filename of the file through the ``base_url``
|
||||
keyword argument. If the ``close`` flag is True (the
|
||||
default), the file will be closed after reading.
|
||||
|
||||
Note that using ``.resolve_filename()`` is more efficient,
|
||||
especially in threaded environments.
|
||||
"""
|
||||
cdef _InputDocument doc_ref
|
||||
try:
|
||||
f.read
|
||||
except AttributeError:
|
||||
raise TypeError, "Argument is not a file-like object"
|
||||
doc_ref = _InputDocument()
|
||||
doc_ref._type = PARSER_DATA_FILE
|
||||
if base_url is not None:
|
||||
doc_ref._filename = _encodeFilename(base_url)
|
||||
else:
|
||||
doc_ref._filename = _getFilenameForFile(f)
|
||||
doc_ref._close_file = close
|
||||
doc_ref._file = f
|
||||
return doc_ref
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ResolverRegistry:
|
||||
cdef object _resolvers
|
||||
cdef Resolver _default_resolver
|
||||
def __cinit__(self, Resolver default_resolver=None):
|
||||
self._resolvers = set()
|
||||
self._default_resolver = default_resolver
|
||||
|
||||
def add(self, Resolver resolver not None):
|
||||
"""add(self, resolver)
|
||||
|
||||
Register a resolver.
|
||||
|
||||
For each requested entity, the 'resolve' method of the resolver will
|
||||
be called and the result will be passed to the parser. If this method
|
||||
returns None, the request will be delegated to other resolvers or the
|
||||
default resolver. The resolvers will be tested in an arbitrary order
|
||||
until the first match is found.
|
||||
"""
|
||||
self._resolvers.add(resolver)
|
||||
|
||||
def remove(self, resolver):
|
||||
"remove(self, resolver)"
|
||||
self._resolvers.discard(resolver)
|
||||
|
||||
cdef _ResolverRegistry _copy(self):
|
||||
cdef _ResolverRegistry registry
|
||||
registry = _ResolverRegistry(self._default_resolver)
|
||||
registry._resolvers = self._resolvers.copy()
|
||||
return registry
|
||||
|
||||
def copy(self):
|
||||
"copy(self)"
|
||||
return self._copy()
|
||||
|
||||
def resolve(self, system_url, public_id, context):
|
||||
"resolve(self, system_url, public_id, context)"
|
||||
for resolver in self._resolvers:
|
||||
result = resolver.resolve(system_url, public_id, context)
|
||||
if result is not None:
|
||||
return result
|
||||
if self._default_resolver is None:
|
||||
return None
|
||||
return self._default_resolver.resolve(system_url, public_id, context)
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self._resolvers)
|
||||
|
||||
|
||||
@cython.internal
|
||||
cdef class _ResolverContext(_ExceptionContext):
|
||||
cdef _ResolverRegistry _resolvers
|
||||
cdef _TempStore _storage
|
||||
|
||||
cdef int clear(self) except -1:
|
||||
_ExceptionContext.clear(self)
|
||||
self._storage.clear()
|
||||
return 0
|
||||
|
||||
|
||||
cdef _initResolverContext(_ResolverContext context,
|
||||
_ResolverRegistry resolvers):
|
||||
if resolvers is None:
|
||||
context._resolvers = _ResolverRegistry()
|
||||
else:
|
||||
context._resolvers = resolvers
|
||||
context._storage = _TempStore()
|
||||
@@ -1,488 +0,0 @@
|
||||
"""
|
||||
lxml-based doctest output comparison.
|
||||
|
||||
Note: normally, you should just import the `lxml.usedoctest` and
|
||||
`lxml.html.usedoctest` modules from within a doctest, instead of this
|
||||
one::
|
||||
|
||||
>>> import lxml.usedoctest # for XML output
|
||||
|
||||
>>> import lxml.html.usedoctest # for HTML output
|
||||
|
||||
To use this module directly, you must call ``lxmldoctest.install()``,
|
||||
which will cause doctest to use this in all subsequent calls.
|
||||
|
||||
This changes the way output is checked and comparisons are made for
|
||||
XML or HTML-like content.
|
||||
|
||||
XML or HTML content is noticed because the example starts with ``<``
|
||||
(it's HTML if it starts with ``<html``). You can also use the
|
||||
``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
|
||||
|
||||
Some rough wildcard-like things are allowed. Whitespace is generally
|
||||
ignored (except in attributes). In text (attributes and text in the
|
||||
body) you can use ``...`` as a wildcard. In an example it also
|
||||
matches any trailing tags in the element, though it does not match
|
||||
leading tags. You may create a tag ``<any>`` or include an ``any``
|
||||
attribute in the tag. An ``any`` tag matches any tag, while the
|
||||
attribute matches any and all attributes.
|
||||
|
||||
When a match fails, the reformatted example and gotten text is
|
||||
displayed (indented), and a rough diff-like output is given. Anything
|
||||
marked with ``+`` is in the output but wasn't supposed to be, and
|
||||
similarly ``-`` means its in the example but wasn't in the output.
|
||||
|
||||
You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
|
||||
"""
|
||||
|
||||
from lxml import etree
|
||||
import sys
|
||||
import re
|
||||
import doctest
|
||||
try:
|
||||
from html import escape as html_escape
|
||||
except ImportError:
|
||||
from cgi import escape as html_escape
|
||||
|
||||
__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
|
||||
'LHTMLOutputChecker', 'install', 'temp_install']
|
||||
|
||||
PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
|
||||
PARSE_XML = doctest.register_optionflag('PARSE_XML')
|
||||
NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
|
||||
|
||||
OutputChecker = doctest.OutputChecker
|
||||
|
||||
def strip(v):
|
||||
if v is None:
|
||||
return None
|
||||
else:
|
||||
return v.strip()
|
||||
|
||||
def norm_whitespace(v):
|
||||
return _norm_whitespace_re.sub(' ', v)
|
||||
|
||||
_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
|
||||
|
||||
def html_fromstring(html):
|
||||
return etree.fromstring(html, _html_parser)
|
||||
|
||||
# We use this to distinguish repr()s from elements:
|
||||
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
|
||||
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
|
||||
|
||||
class LXMLOutputChecker(OutputChecker):
|
||||
|
||||
empty_tags = (
|
||||
'param', 'img', 'area', 'br', 'basefont', 'input',
|
||||
'base', 'meta', 'link', 'col')
|
||||
|
||||
def get_default_parser(self):
|
||||
return etree.XML
|
||||
|
||||
def check_output(self, want, got, optionflags):
|
||||
alt_self = getattr(self, '_temp_override_self', None)
|
||||
if alt_self is not None:
|
||||
super_method = self._temp_call_super_check_output
|
||||
self = alt_self
|
||||
else:
|
||||
super_method = OutputChecker.check_output
|
||||
parser = self.get_parser(want, got, optionflags)
|
||||
if not parser:
|
||||
return super_method(
|
||||
self, want, got, optionflags)
|
||||
try:
|
||||
want_doc = parser(want)
|
||||
except etree.XMLSyntaxError:
|
||||
return False
|
||||
try:
|
||||
got_doc = parser(got)
|
||||
except etree.XMLSyntaxError:
|
||||
return False
|
||||
return self.compare_docs(want_doc, got_doc)
|
||||
|
||||
def get_parser(self, want, got, optionflags):
|
||||
parser = None
|
||||
if NOPARSE_MARKUP & optionflags:
|
||||
return None
|
||||
if PARSE_HTML & optionflags:
|
||||
parser = html_fromstring
|
||||
elif PARSE_XML & optionflags:
|
||||
parser = etree.XML
|
||||
elif (want.strip().lower().startswith('<html')
|
||||
and got.strip().startswith('<html')):
|
||||
parser = html_fromstring
|
||||
elif (self._looks_like_markup(want)
|
||||
and self._looks_like_markup(got)):
|
||||
parser = self.get_default_parser()
|
||||
return parser
|
||||
|
||||
def _looks_like_markup(self, s):
|
||||
s = s.strip()
|
||||
return (s.startswith('<')
|
||||
and not _repr_re.search(s))
|
||||
|
||||
def compare_docs(self, want, got):
|
||||
if not self.tag_compare(want.tag, got.tag):
|
||||
return False
|
||||
if not self.text_compare(want.text, got.text, True):
|
||||
return False
|
||||
if not self.text_compare(want.tail, got.tail, True):
|
||||
return False
|
||||
if 'any' not in want.attrib:
|
||||
want_keys = sorted(want.attrib.keys())
|
||||
got_keys = sorted(got.attrib.keys())
|
||||
if want_keys != got_keys:
|
||||
return False
|
||||
for key in want_keys:
|
||||
if not self.text_compare(want.attrib[key], got.attrib[key], False):
|
||||
return False
|
||||
if want.text != '...' or len(want):
|
||||
want_children = list(want)
|
||||
got_children = list(got)
|
||||
while want_children or got_children:
|
||||
if not want_children or not got_children:
|
||||
return False
|
||||
want_first = want_children.pop(0)
|
||||
got_first = got_children.pop(0)
|
||||
if not self.compare_docs(want_first, got_first):
|
||||
return False
|
||||
if not got_children and want_first.tail == '...':
|
||||
break
|
||||
return True
|
||||
|
||||
def text_compare(self, want, got, strip):
|
||||
want = want or ''
|
||||
got = got or ''
|
||||
if strip:
|
||||
want = norm_whitespace(want).strip()
|
||||
got = norm_whitespace(got).strip()
|
||||
want = '^%s$' % re.escape(want)
|
||||
want = want.replace(r'\.\.\.', '.*')
|
||||
if re.search(want, got):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def tag_compare(self, want, got):
|
||||
if want == 'any':
|
||||
return True
|
||||
if (not isinstance(want, (str, bytes))
|
||||
or not isinstance(got, (str, bytes))):
|
||||
return want == got
|
||||
want = want or ''
|
||||
got = got or ''
|
||||
if want.startswith('{...}'):
|
||||
# Ellipsis on the namespace
|
||||
return want.split('}')[-1] == got.split('}')[-1]
|
||||
else:
|
||||
return want == got
|
||||
|
||||
def output_difference(self, example, got, optionflags):
|
||||
want = example.want
|
||||
parser = self.get_parser(want, got, optionflags)
|
||||
errors = []
|
||||
if parser is not None:
|
||||
try:
|
||||
want_doc = parser(want)
|
||||
except etree.XMLSyntaxError:
|
||||
e = sys.exc_info()[1]
|
||||
errors.append('In example: %s' % e)
|
||||
try:
|
||||
got_doc = parser(got)
|
||||
except etree.XMLSyntaxError:
|
||||
e = sys.exc_info()[1]
|
||||
errors.append('In actual output: %s' % e)
|
||||
if parser is None or errors:
|
||||
value = OutputChecker.output_difference(
|
||||
self, example, got, optionflags)
|
||||
if errors:
|
||||
errors.append(value)
|
||||
return '\n'.join(errors)
|
||||
else:
|
||||
return value
|
||||
html = parser is html_fromstring
|
||||
diff_parts = ['Expected:',
|
||||
self.format_doc(want_doc, html, 2),
|
||||
'Got:',
|
||||
self.format_doc(got_doc, html, 2),
|
||||
'Diff:',
|
||||
self.collect_diff(want_doc, got_doc, html, 2)]
|
||||
return '\n'.join(diff_parts)
|
||||
|
||||
def html_empty_tag(self, el, html=True):
|
||||
if not html:
|
||||
return False
|
||||
if el.tag not in self.empty_tags:
|
||||
return False
|
||||
if el.text or len(el):
|
||||
# This shouldn't happen (contents in an empty tag)
|
||||
return False
|
||||
return True
|
||||
|
||||
def format_doc(self, doc, html, indent, prefix=''):
|
||||
parts = []
|
||||
if not len(doc):
|
||||
# No children...
|
||||
parts.append(' '*indent)
|
||||
parts.append(prefix)
|
||||
parts.append(self.format_tag(doc))
|
||||
if not self.html_empty_tag(doc, html):
|
||||
if strip(doc.text):
|
||||
parts.append(self.format_text(doc.text))
|
||||
parts.append(self.format_end_tag(doc))
|
||||
if strip(doc.tail):
|
||||
parts.append(self.format_text(doc.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
parts.append(' '*indent)
|
||||
parts.append(prefix)
|
||||
parts.append(self.format_tag(doc))
|
||||
if not self.html_empty_tag(doc, html):
|
||||
parts.append('\n')
|
||||
if strip(doc.text):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_text(doc.text))
|
||||
parts.append('\n')
|
||||
for el in doc:
|
||||
parts.append(self.format_doc(el, html, indent+2))
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_end_tag(doc))
|
||||
parts.append('\n')
|
||||
if strip(doc.tail):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_text(doc.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
|
||||
def format_text(self, text, strip=True):
|
||||
if text is None:
|
||||
return ''
|
||||
if strip:
|
||||
text = text.strip()
|
||||
return html_escape(text, 1)
|
||||
|
||||
def format_tag(self, el):
|
||||
attrs = []
|
||||
if isinstance(el, etree.CommentBase):
|
||||
# FIXME: probably PIs should be handled specially too?
|
||||
return '<!--'
|
||||
for name, value in sorted(el.attrib.items()):
|
||||
attrs.append('%s="%s"' % (name, self.format_text(value, False)))
|
||||
if not attrs:
|
||||
return '<%s>' % el.tag
|
||||
return '<%s %s>' % (el.tag, ' '.join(attrs))
|
||||
|
||||
def format_end_tag(self, el):
|
||||
if isinstance(el, etree.CommentBase):
|
||||
# FIXME: probably PIs should be handled specially too?
|
||||
return '-->'
|
||||
return '</%s>' % el.tag
|
||||
|
||||
def collect_diff(self, want, got, html, indent):
|
||||
parts = []
|
||||
if not len(want) and not len(got):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_tag(want, got))
|
||||
if not self.html_empty_tag(got, html):
|
||||
parts.append(self.collect_diff_text(want.text, got.text))
|
||||
parts.append(self.collect_diff_end_tag(want, got))
|
||||
parts.append(self.collect_diff_text(want.tail, got.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_tag(want, got))
|
||||
parts.append('\n')
|
||||
if strip(want.text) or strip(got.text):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_text(want.text, got.text))
|
||||
parts.append('\n')
|
||||
want_children = list(want)
|
||||
got_children = list(got)
|
||||
while want_children or got_children:
|
||||
if not want_children:
|
||||
parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
|
||||
continue
|
||||
if not got_children:
|
||||
parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
|
||||
continue
|
||||
parts.append(self.collect_diff(
|
||||
want_children.pop(0), got_children.pop(0), html, indent+2))
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_end_tag(want, got))
|
||||
parts.append('\n')
|
||||
if strip(want.tail) or strip(got.tail):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_text(want.tail, got.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
|
||||
def collect_diff_tag(self, want, got):
|
||||
if not self.tag_compare(want.tag, got.tag):
|
||||
tag = '%s (got: %s)' % (want.tag, got.tag)
|
||||
else:
|
||||
tag = got.tag
|
||||
attrs = []
|
||||
any = want.tag == 'any' or 'any' in want.attrib
|
||||
for name, value in sorted(got.attrib.items()):
|
||||
if name not in want.attrib and not any:
|
||||
attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
|
||||
else:
|
||||
if name in want.attrib:
|
||||
text = self.collect_diff_text(want.attrib[name], value, False)
|
||||
else:
|
||||
text = self.format_text(value, False)
|
||||
attrs.append('%s="%s"' % (name, text))
|
||||
if not any:
|
||||
for name, value in sorted(want.attrib.items()):
|
||||
if name in got.attrib:
|
||||
continue
|
||||
attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
|
||||
if attrs:
|
||||
tag = '<%s %s>' % (tag, ' '.join(attrs))
|
||||
else:
|
||||
tag = '<%s>' % tag
|
||||
return tag
|
||||
|
||||
def collect_diff_end_tag(self, want, got):
|
||||
if want.tag != got.tag:
|
||||
tag = '%s (got: %s)' % (want.tag, got.tag)
|
||||
else:
|
||||
tag = got.tag
|
||||
return '</%s>' % tag
|
||||
|
||||
def collect_diff_text(self, want, got, strip=True):
|
||||
if self.text_compare(want, got, strip):
|
||||
if not got:
|
||||
return ''
|
||||
return self.format_text(got, strip)
|
||||
text = '%s (got: %s)' % (want, got)
|
||||
return self.format_text(text, strip)
|
||||
|
||||
class LHTMLOutputChecker(LXMLOutputChecker):
|
||||
def get_default_parser(self):
|
||||
return html_fromstring
|
||||
|
||||
def install(html=False):
|
||||
"""
|
||||
Install doctestcompare for all future doctests.
|
||||
|
||||
If html is true, then by default the HTML parser will be used;
|
||||
otherwise the XML parser is used.
|
||||
"""
|
||||
if html:
|
||||
doctest.OutputChecker = LHTMLOutputChecker
|
||||
else:
|
||||
doctest.OutputChecker = LXMLOutputChecker
|
||||
|
||||
def temp_install(html=False, del_module=None):
|
||||
"""
|
||||
Use this *inside* a doctest to enable this checker for this
|
||||
doctest only.
|
||||
|
||||
If html is true, then by default the HTML parser will be used;
|
||||
otherwise the XML parser is used.
|
||||
"""
|
||||
if html:
|
||||
Checker = LHTMLOutputChecker
|
||||
else:
|
||||
Checker = LXMLOutputChecker
|
||||
frame = _find_doctest_frame()
|
||||
dt_self = frame.f_locals['self']
|
||||
checker = Checker()
|
||||
old_checker = dt_self._checker
|
||||
dt_self._checker = checker
|
||||
# The unfortunate thing is that there is a local variable 'check'
|
||||
# in the function that runs the doctests, that is a bound method
|
||||
# into the output checker. We have to update that. We can't
|
||||
# modify the frame, so we have to modify the object in place. The
|
||||
# only way to do this is to actually change the func_code
|
||||
# attribute of the method. We change it, and then wait for
|
||||
# __record_outcome to be run, which signals the end of the __run
|
||||
# method, at which point we restore the previous check_output
|
||||
# implementation.
|
||||
check_func = frame.f_locals['check'].__func__
|
||||
checker_check_func = checker.check_output.__func__
|
||||
# Because we can't patch up func_globals, this is the only global
|
||||
# in check_output that we care about:
|
||||
doctest.etree = etree
|
||||
_RestoreChecker(dt_self, old_checker, checker,
|
||||
check_func, checker_check_func,
|
||||
del_module)
|
||||
|
||||
class _RestoreChecker:
|
||||
def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
|
||||
del_module):
|
||||
self.dt_self = dt_self
|
||||
self.checker = old_checker
|
||||
self.checker._temp_call_super_check_output = self.call_super
|
||||
self.checker._temp_override_self = new_checker
|
||||
self.check_func = check_func
|
||||
self.clone_func = clone_func
|
||||
self.del_module = del_module
|
||||
self.install_clone()
|
||||
self.install_dt_self()
|
||||
def install_clone(self):
|
||||
self.func_code = self.check_func.__code__
|
||||
self.func_globals = self.check_func.__globals__
|
||||
self.check_func.__code__ = self.clone_func.__code__
|
||||
def uninstall_clone(self):
|
||||
self.check_func.__code__ = self.func_code
|
||||
def install_dt_self(self):
|
||||
self.prev_func = self.dt_self._DocTestRunner__record_outcome
|
||||
self.dt_self._DocTestRunner__record_outcome = self
|
||||
def uninstall_dt_self(self):
|
||||
self.dt_self._DocTestRunner__record_outcome = self.prev_func
|
||||
def uninstall_module(self):
|
||||
if self.del_module:
|
||||
import sys
|
||||
del sys.modules[self.del_module]
|
||||
if '.' in self.del_module:
|
||||
package, module = self.del_module.rsplit('.', 1)
|
||||
package_mod = sys.modules[package]
|
||||
delattr(package_mod, module)
|
||||
def __call__(self, *args, **kw):
|
||||
self.uninstall_clone()
|
||||
self.uninstall_dt_self()
|
||||
del self.checker._temp_override_self
|
||||
del self.checker._temp_call_super_check_output
|
||||
result = self.prev_func(*args, **kw)
|
||||
self.uninstall_module()
|
||||
return result
|
||||
def call_super(self, *args, **kw):
|
||||
self.uninstall_clone()
|
||||
try:
|
||||
return self.check_func(*args, **kw)
|
||||
finally:
|
||||
self.install_clone()
|
||||
|
||||
def _find_doctest_frame():
|
||||
import sys
|
||||
frame = sys._getframe(1)
|
||||
while frame:
|
||||
l = frame.f_locals
|
||||
if 'BOOM' in l:
|
||||
# Sign of doctest
|
||||
return frame
|
||||
frame = frame.f_back
|
||||
raise LookupError(
|
||||
"Could not find doctest (only use this function *inside* a doctest)")
|
||||
|
||||
__test__ = {
|
||||
'basic': '''
|
||||
>>> temp_install()
|
||||
>>> print """<xml a="1" b="2">stuff</xml>"""
|
||||
<xml b="2" a="1">...</xml>
|
||||
>>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
|
||||
<xml xmlns="...">
|
||||
<tag attr="..." />
|
||||
</xml>
|
||||
>>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
|
||||
<xml>...foo /></xml>
|
||||
'''}
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
||||
|
||||
@@ -1,479 +0,0 @@
|
||||
# support for DTD validation
|
||||
from lxml.includes cimport dtdvalid
|
||||
|
||||
cdef class DTDError(LxmlError):
|
||||
"""Base class for DTD errors.
|
||||
"""
|
||||
|
||||
cdef class DTDParseError(DTDError):
|
||||
"""Error while parsing a DTD.
|
||||
"""
|
||||
|
||||
cdef class DTDValidateError(DTDError):
|
||||
"""Error while validating an XML document with a DTD.
|
||||
"""
|
||||
|
||||
|
||||
cdef inline int _assertValidDTDNode(node, void *c_node) except -1:
|
||||
assert c_node is not NULL, "invalid DTD proxy at %s" % id(node)
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _DTDElementContentDecl:
|
||||
cdef DTD _dtd
|
||||
cdef tree.xmlElementContent* _c_node
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s object name=%r type=%r occur=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.type, self.occur, id(self))
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.name)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int type = self._c_node.type
|
||||
if type == tree.XML_ELEMENT_CONTENT_PCDATA:
|
||||
return "pcdata"
|
||||
elif type == tree.XML_ELEMENT_CONTENT_ELEMENT:
|
||||
return "element"
|
||||
elif type == tree.XML_ELEMENT_CONTENT_SEQ:
|
||||
return "seq"
|
||||
elif type == tree.XML_ELEMENT_CONTENT_OR:
|
||||
return "or"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def occur(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int occur = self._c_node.ocur
|
||||
if occur == tree.XML_ELEMENT_CONTENT_ONCE:
|
||||
return "once"
|
||||
elif occur == tree.XML_ELEMENT_CONTENT_OPT:
|
||||
return "opt"
|
||||
elif occur == tree.XML_ELEMENT_CONTENT_MULT:
|
||||
return "mult"
|
||||
elif occur == tree.XML_ELEMENT_CONTENT_PLUS:
|
||||
return "plus"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def left(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
c1 = self._c_node.c1
|
||||
if c1:
|
||||
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
||||
node._dtd = self._dtd
|
||||
node._c_node = <tree.xmlElementContent*>c1
|
||||
return node
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def right(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
c2 = self._c_node.c2
|
||||
if c2:
|
||||
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
||||
node._dtd = self._dtd
|
||||
node._c_node = <tree.xmlElementContent*>c2
|
||||
return node
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _DTDAttributeDecl:
|
||||
cdef DTD _dtd
|
||||
cdef tree.xmlAttribute* _c_node
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s object name=%r elemname=%r prefix=%r type=%r default=%r default_value=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.elemname, self.prefix, self.type, self.default, self.default_value, id(self))
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.name)
|
||||
|
||||
@property
|
||||
def elemname(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.elem)
|
||||
|
||||
@property
|
||||
def prefix(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.prefix)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int type = self._c_node.atype
|
||||
if type == tree.XML_ATTRIBUTE_CDATA:
|
||||
return "cdata"
|
||||
elif type == tree.XML_ATTRIBUTE_ID:
|
||||
return "id"
|
||||
elif type == tree.XML_ATTRIBUTE_IDREF:
|
||||
return "idref"
|
||||
elif type == tree.XML_ATTRIBUTE_IDREFS:
|
||||
return "idrefs"
|
||||
elif type == tree.XML_ATTRIBUTE_ENTITY:
|
||||
return "entity"
|
||||
elif type == tree.XML_ATTRIBUTE_ENTITIES:
|
||||
return "entities"
|
||||
elif type == tree.XML_ATTRIBUTE_NMTOKEN:
|
||||
return "nmtoken"
|
||||
elif type == tree.XML_ATTRIBUTE_NMTOKENS:
|
||||
return "nmtokens"
|
||||
elif type == tree.XML_ATTRIBUTE_ENUMERATION:
|
||||
return "enumeration"
|
||||
elif type == tree.XML_ATTRIBUTE_NOTATION:
|
||||
return "notation"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def default(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int default = self._c_node.def_
|
||||
if default == tree.XML_ATTRIBUTE_NONE:
|
||||
return "none"
|
||||
elif default == tree.XML_ATTRIBUTE_REQUIRED:
|
||||
return "required"
|
||||
elif default == tree.XML_ATTRIBUTE_IMPLIED:
|
||||
return "implied"
|
||||
elif default == tree.XML_ATTRIBUTE_FIXED:
|
||||
return "fixed"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def default_value(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.defaultValue)
|
||||
|
||||
def itervalues(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef tree.xmlEnumeration *c_node = self._c_node.tree
|
||||
while c_node is not NULL:
|
||||
yield funicode(c_node.name)
|
||||
c_node = c_node.next
|
||||
|
||||
def values(self):
|
||||
return list(self.itervalues())
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _DTDElementDecl:
|
||||
cdef DTD _dtd
|
||||
cdef tree.xmlElement* _c_node
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s.%s object name=%r prefix=%r type=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.prefix, self.type, id(self))
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.name)
|
||||
|
||||
@property
|
||||
def prefix(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.prefix)
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef int type = self._c_node.etype
|
||||
if type == tree.XML_ELEMENT_TYPE_UNDEFINED:
|
||||
return "undefined"
|
||||
elif type == tree.XML_ELEMENT_TYPE_EMPTY:
|
||||
return "empty"
|
||||
elif type == tree.XML_ELEMENT_TYPE_ANY:
|
||||
return "any"
|
||||
elif type == tree.XML_ELEMENT_TYPE_MIXED:
|
||||
return "mixed"
|
||||
elif type == tree.XML_ELEMENT_TYPE_ELEMENT:
|
||||
return "element"
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef tree.xmlElementContent *content = self._c_node.content
|
||||
if content:
|
||||
node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
|
||||
node._dtd = self._dtd
|
||||
node._c_node = content
|
||||
return node
|
||||
else:
|
||||
return None
|
||||
|
||||
def iterattributes(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
cdef tree.xmlAttribute *c_node = self._c_node.attributes
|
||||
while c_node:
|
||||
node = <_DTDAttributeDecl>_DTDAttributeDecl.__new__(_DTDAttributeDecl)
|
||||
node._dtd = self._dtd
|
||||
node._c_node = c_node
|
||||
yield node
|
||||
c_node = c_node.nexth
|
||||
|
||||
def attributes(self):
|
||||
return list(self.iterattributes())
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
@cython.freelist(8)
|
||||
cdef class _DTDEntityDecl:
|
||||
cdef DTD _dtd
|
||||
cdef tree.xmlEntity* _c_node
|
||||
def __repr__(self):
|
||||
return "<%s.%s object name=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.name)
|
||||
|
||||
@property
|
||||
def orig(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.orig)
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.content)
|
||||
|
||||
@property
|
||||
def system_url(self):
|
||||
_assertValidDTDNode(self, self._c_node)
|
||||
return funicodeOrNone(self._c_node.SystemID)
|
||||
|
||||
|
||||
################################################################################
|
||||
# DTD
|
||||
|
||||
cdef class DTD(_Validator):
|
||||
"""DTD(self, file=None, external_id=None)
|
||||
A DTD validator.
|
||||
|
||||
Can load from filesystem directly given a filename or file-like object.
|
||||
Alternatively, pass the keyword parameter ``external_id`` to load from a
|
||||
catalog.
|
||||
"""
|
||||
cdef tree.xmlDtd* _c_dtd
|
||||
def __init__(self, file=None, *, external_id=None):
|
||||
_Validator.__init__(self)
|
||||
if file is not None:
|
||||
file = _getFSPathOrObject(file)
|
||||
if _isString(file):
|
||||
file = _encodeFilename(file)
|
||||
with self._error_log:
|
||||
orig_loader = _register_document_loader()
|
||||
self._c_dtd = xmlparser.xmlParseDTD(NULL, _xcstr(file))
|
||||
_reset_document_loader(orig_loader)
|
||||
elif hasattr(file, 'read'):
|
||||
orig_loader = _register_document_loader()
|
||||
self._c_dtd = _parseDtdFromFilelike(file)
|
||||
_reset_document_loader(orig_loader)
|
||||
else:
|
||||
raise DTDParseError, "file must be a filename, file-like or path-like object"
|
||||
elif external_id is not None:
|
||||
external_id_utf = _utf8(external_id)
|
||||
with self._error_log:
|
||||
orig_loader = _register_document_loader()
|
||||
self._c_dtd = xmlparser.xmlParseDTD(<const_xmlChar*>external_id_utf, NULL)
|
||||
_reset_document_loader(orig_loader)
|
||||
else:
|
||||
raise DTDParseError, "either filename or external ID required"
|
||||
|
||||
if self._c_dtd is NULL:
|
||||
raise DTDParseError(
|
||||
self._error_log._buildExceptionMessage("error parsing DTD"),
|
||||
self._error_log)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
if self._c_dtd is NULL:
|
||||
return None
|
||||
return funicodeOrNone(self._c_dtd.name)
|
||||
|
||||
@property
|
||||
def external_id(self):
|
||||
if self._c_dtd is NULL:
|
||||
return None
|
||||
return funicodeOrNone(self._c_dtd.ExternalID)
|
||||
|
||||
@property
|
||||
def system_url(self):
|
||||
if self._c_dtd is NULL:
|
||||
return None
|
||||
return funicodeOrNone(self._c_dtd.SystemID)
|
||||
|
||||
def iterelements(self):
|
||||
cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
|
||||
while c_node is not NULL:
|
||||
if c_node.type == tree.XML_ELEMENT_DECL:
|
||||
node = _DTDElementDecl()
|
||||
node._dtd = self
|
||||
node._c_node = <tree.xmlElement*>c_node
|
||||
yield node
|
||||
c_node = c_node.next
|
||||
|
||||
def elements(self):
|
||||
return list(self.iterelements())
|
||||
|
||||
def iterentities(self):
|
||||
cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
|
||||
while c_node is not NULL:
|
||||
if c_node.type == tree.XML_ENTITY_DECL:
|
||||
node = _DTDEntityDecl()
|
||||
node._dtd = self
|
||||
node._c_node = <tree.xmlEntity*>c_node
|
||||
yield node
|
||||
c_node = c_node.next
|
||||
|
||||
def entities(self):
|
||||
return list(self.iterentities())
|
||||
|
||||
def __dealloc__(self):
|
||||
tree.xmlFreeDtd(self._c_dtd)
|
||||
|
||||
def __call__(self, etree):
|
||||
"""__call__(self, etree)
|
||||
|
||||
Validate doc using the DTD.
|
||||
|
||||
Returns true if the document is valid, false if not.
|
||||
"""
|
||||
cdef _Document doc
|
||||
cdef _Element root_node
|
||||
cdef xmlDoc* c_doc
|
||||
cdef dtdvalid.xmlValidCtxt* valid_ctxt
|
||||
cdef int ret = -1
|
||||
|
||||
assert self._c_dtd is not NULL, "DTD not initialised"
|
||||
doc = _documentOrRaise(etree)
|
||||
root_node = _rootNodeOrRaise(etree)
|
||||
|
||||
valid_ctxt = dtdvalid.xmlNewValidCtxt()
|
||||
if valid_ctxt is NULL:
|
||||
raise DTDError("Failed to create validation context")
|
||||
|
||||
# work around error reporting bug in libxml2 <= 2.9.1 (and later?)
|
||||
# https://bugzilla.gnome.org/show_bug.cgi?id=724903
|
||||
valid_ctxt.error = <dtdvalid.xmlValidityErrorFunc>_nullGenericErrorFunc
|
||||
valid_ctxt.userData = NULL
|
||||
|
||||
try:
|
||||
with self._error_log:
|
||||
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
|
||||
ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
|
||||
_destroyFakeDoc(doc._c_doc, c_doc)
|
||||
finally:
|
||||
dtdvalid.xmlFreeValidCtxt(valid_ctxt)
|
||||
|
||||
if ret == -1:
|
||||
raise DTDValidateError("Internal error in DTD validation",
|
||||
self._error_log)
|
||||
return ret == 1
|
||||
|
||||
|
||||
cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
|
||||
cdef _ExceptionContext exc_context
|
||||
cdef _FileReaderContext dtd_parser
|
||||
cdef _ErrorLog error_log
|
||||
cdef tree.xmlDtd* c_dtd = NULL
|
||||
exc_context = _ExceptionContext()
|
||||
dtd_parser = _FileReaderContext(file, exc_context, None)
|
||||
error_log = _ErrorLog()
|
||||
|
||||
with error_log:
|
||||
c_dtd = dtd_parser._readDtd()
|
||||
|
||||
exc_context._raise_if_stored()
|
||||
if c_dtd is NULL:
|
||||
raise DTDParseError("error parsing DTD", error_log)
|
||||
return c_dtd
|
||||
|
||||
cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
|
||||
# do not run through DTD.__init__()!
|
||||
cdef DTD dtd
|
||||
if c_dtd is NULL:
|
||||
return None
|
||||
dtd = DTD.__new__(DTD)
|
||||
dtd._c_dtd = _copyDtd(c_dtd)
|
||||
_Validator.__init__(dtd)
|
||||
return dtd
|
||||
|
||||
|
||||
cdef tree.xmlDtd* _copyDtd(tree.xmlDtd* c_orig_dtd) except NULL:
|
||||
"""
|
||||
Copy a DTD. libxml2 (currently) fails to set up the element->attributes
|
||||
links when copying DTDs, so we have to rebuild them here.
|
||||
"""
|
||||
c_dtd = tree.xmlCopyDtd(c_orig_dtd)
|
||||
if not c_dtd:
|
||||
raise MemoryError
|
||||
cdef tree.xmlNode* c_node = c_dtd.children
|
||||
while c_node:
|
||||
if c_node.type == tree.XML_ATTRIBUTE_DECL:
|
||||
_linkDtdAttribute(c_dtd, <tree.xmlAttribute*>c_node)
|
||||
c_node = c_node.next
|
||||
return c_dtd
|
||||
|
||||
|
||||
cdef void _linkDtdAttribute(tree.xmlDtd* c_dtd, tree.xmlAttribute* c_attr) noexcept:
|
||||
"""
|
||||
Create the link to the DTD attribute declaration from the corresponding
|
||||
element declaration.
|
||||
"""
|
||||
c_elem = dtdvalid.xmlGetDtdElementDesc(c_dtd, c_attr.elem)
|
||||
if not c_elem:
|
||||
# no such element? something is wrong with the DTD ...
|
||||
return
|
||||
c_pos = c_elem.attributes
|
||||
if not c_pos:
|
||||
c_elem.attributes = c_attr
|
||||
c_attr.nexth = NULL
|
||||
return
|
||||
# libxml2 keeps namespace declarations first, and we need to make
|
||||
# sure we don't re-insert attributes that are already there
|
||||
if _isDtdNsDecl(c_attr):
|
||||
if not _isDtdNsDecl(c_pos):
|
||||
c_elem.attributes = c_attr
|
||||
c_attr.nexth = c_pos
|
||||
return
|
||||
while c_pos != c_attr and c_pos.nexth and _isDtdNsDecl(c_pos.nexth):
|
||||
c_pos = c_pos.nexth
|
||||
else:
|
||||
# append at end
|
||||
while c_pos != c_attr and c_pos.nexth:
|
||||
c_pos = c_pos.nexth
|
||||
if c_pos == c_attr:
|
||||
return
|
||||
c_attr.nexth = c_pos.nexth
|
||||
c_pos.nexth = c_attr
|
||||
|
||||
|
||||
cdef bint _isDtdNsDecl(tree.xmlAttribute* c_attr) noexcept:
|
||||
if cstring_h.strcmp(<const_char*>c_attr.name, "xmlns") == 0:
|
||||
return True
|
||||
if (c_attr.prefix is not NULL and
|
||||
cstring_h.strcmp(<const_char*>c_attr.prefix, "xmlns") == 0):
|
||||
return True
|
||||
return False
|
||||
Binary file not shown.
@@ -1,244 +0,0 @@
|
||||
/* Generated by Cython 3.1.4 */
|
||||
|
||||
#ifndef __PYX_HAVE__lxml__etree
|
||||
#define __PYX_HAVE__lxml__etree
|
||||
|
||||
#include "Python.h"
|
||||
struct LxmlDocument;
|
||||
struct LxmlElement;
|
||||
struct LxmlElementTree;
|
||||
struct LxmlElementTagMatcher;
|
||||
struct LxmlElementIterator;
|
||||
struct LxmlElementBase;
|
||||
struct LxmlElementClassLookup;
|
||||
struct LxmlFallbackElementClassLookup;
|
||||
|
||||
/* "lxml/etree.pyx":451
|
||||
*
|
||||
* # type of a function that steps from node to node
|
||||
* ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
|
||||
*
|
||||
*
|
||||
*/
|
||||
typedef xmlNode *(*_node_to_node_function)(xmlNode *);
|
||||
|
||||
/* "lxml/etree.pyx":465
|
||||
* # Public Python API
|
||||
*
|
||||
* @cython.final # <<<<<<<<<<<<<<
|
||||
* @cython.freelist(8)
|
||||
* cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
|
||||
*/
|
||||
struct LxmlDocument {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
|
||||
int _ns_counter;
|
||||
PyObject *_prefix_tail;
|
||||
xmlDoc *_c_doc;
|
||||
struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":817
|
||||
*
|
||||
*
|
||||
* @cython.no_gc_clear # <<<<<<<<<<<<<<
|
||||
* cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
||||
* """Element class.
|
||||
*/
|
||||
struct LxmlElement {
|
||||
PyObject_HEAD
|
||||
struct LxmlDocument *_doc;
|
||||
xmlNode *_c_node;
|
||||
PyObject *_tag;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":1991
|
||||
*
|
||||
*
|
||||
* cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementTree ]:
|
||||
* cdef _Document _doc
|
||||
*/
|
||||
struct LxmlElementTree {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
|
||||
struct LxmlDocument *_doc;
|
||||
struct LxmlElement *_context_node;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":2765
|
||||
*
|
||||
*
|
||||
* cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
|
||||
* type LxmlElementTagMatcherType ]:
|
||||
* """
|
||||
*/
|
||||
struct LxmlElementTagMatcher {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
|
||||
PyObject *_pystrings;
|
||||
int _node_type;
|
||||
char *_href;
|
||||
char *_name;
|
||||
};
|
||||
|
||||
/* "lxml/etree.pyx":2796
|
||||
* self._name = NULL
|
||||
*
|
||||
* cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
|
||||
* object LxmlElementIterator, type LxmlElementIteratorType ]:
|
||||
* """
|
||||
*/
|
||||
struct LxmlElementIterator {
|
||||
struct LxmlElementTagMatcher __pyx_base;
|
||||
struct LxmlElement *_node;
|
||||
_node_to_node_function _next_element;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":6
|
||||
* # Custom Element classes
|
||||
*
|
||||
* cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementBase ]:
|
||||
* """ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
||||
*/
|
||||
struct LxmlElementBase {
|
||||
struct LxmlElement __pyx_base;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":210
|
||||
* # Element class lookup
|
||||
*
|
||||
* ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
|
||||
*
|
||||
* # class to store element class lookup functions
|
||||
*/
|
||||
typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
|
||||
|
||||
/* "src/lxml/classlookup.pxi":213
|
||||
*
|
||||
* # class to store element class lookup functions
|
||||
* cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementClassLookup ]:
|
||||
* """ElementClassLookup(self)
|
||||
*/
|
||||
struct LxmlElementClassLookup {
|
||||
PyObject_HEAD
|
||||
_element_class_lookup_function _lookup_function;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":221
|
||||
*
|
||||
*
|
||||
* cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
|
||||
* [ type LxmlFallbackElementClassLookupType,
|
||||
* object LxmlFallbackElementClassLookup ]:
|
||||
*/
|
||||
struct LxmlFallbackElementClassLookup {
|
||||
struct LxmlElementClassLookup __pyx_base;
|
||||
struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
|
||||
struct LxmlElementClassLookup *fallback;
|
||||
_element_class_lookup_function _fallback_function;
|
||||
};
|
||||
|
||||
#ifndef __PYX_HAVE_API__lxml__etree
|
||||
|
||||
#ifdef CYTHON_EXTERN_C
|
||||
#undef __PYX_EXTERN_C
|
||||
#define __PYX_EXTERN_C CYTHON_EXTERN_C
|
||||
#elif defined(__PYX_EXTERN_C)
|
||||
#ifdef _MSC_VER
|
||||
#pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.")
|
||||
#else
|
||||
#warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.
|
||||
#endif
|
||||
#else
|
||||
#ifdef __cplusplus
|
||||
#define __PYX_EXTERN_C extern "C"
|
||||
#else
|
||||
#define __PYX_EXTERN_C extern
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DL_IMPORT
|
||||
#define DL_IMPORT(_T) _T
|
||||
#endif
|
||||
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType;
|
||||
|
||||
__PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int);
|
||||
__PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *);
|
||||
__PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *);
|
||||
__PYX_EXTERN_C int hasText(xmlNode *);
|
||||
__PYX_EXTERN_C int hasTail(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *textOf(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *tailOf(xmlNode *);
|
||||
__PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *);
|
||||
__PYX_EXTERN_C int setTailText(xmlNode *, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *);
|
||||
__PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int);
|
||||
__PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int);
|
||||
__PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *);
|
||||
__PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C int hasChild(xmlNode *);
|
||||
__PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C xmlNode *nextElement(xmlNode *);
|
||||
__PYX_EXTERN_C xmlNode *previousElement(xmlNode *);
|
||||
__PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C PyObject *pyunicode(const xmlChar *);
|
||||
__PYX_EXTERN_C PyObject *utf8(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *getNsTag(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *);
|
||||
__PYX_EXTERN_C PyObject *namespacedName(xmlNode *);
|
||||
__PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *);
|
||||
__PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *);
|
||||
|
||||
#endif /* !__PYX_HAVE_API__lxml__etree */
|
||||
|
||||
/* WARNING: the interface of the module init function changed in CPython 3.5. */
|
||||
/* It now returns a PyModuleDef instance instead of a PyModule instance. */
|
||||
|
||||
/* WARNING: Use PyImport_AppendInittab("etree", PyInit_etree) instead of calling PyInit_etree directly from Python 3.5 */
|
||||
PyMODINIT_FUNC PyInit_etree(void);
|
||||
|
||||
#if PY_VERSION_HEX >= 0x03050000 && (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201402L))
|
||||
#if defined(__cplusplus) && __cplusplus >= 201402L
|
||||
[[deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")]] inline
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
__attribute__ ((__deprecated__("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly."), __unused__)) __inline__
|
||||
#elif defined(_MSC_VER)
|
||||
__declspec(deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")) __inline
|
||||
#endif
|
||||
static PyObject* __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyObject* res) {
|
||||
return res;
|
||||
}
|
||||
#define PyInit_etree() __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyInit_etree())
|
||||
#endif
|
||||
|
||||
#endif /* !__PYX_HAVE__lxml__etree */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,204 +0,0 @@
|
||||
/* Generated by Cython 3.1.4 */
|
||||
|
||||
#ifndef __PYX_HAVE_API__lxml__etree
|
||||
#define __PYX_HAVE_API__lxml__etree
|
||||
#ifdef __MINGW64__
|
||||
#define MS_WIN64
|
||||
#endif
|
||||
#include "Python.h"
|
||||
#include "etree.h"
|
||||
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0;
|
||||
#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0;
|
||||
#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0;
|
||||
#define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
||||
#define makeElement __pyx_api_f_4lxml_5etree_makeElement
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
||||
#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement
|
||||
static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0;
|
||||
#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
||||
#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
||||
#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback
|
||||
static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches
|
||||
static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0;
|
||||
#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0;
|
||||
#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0;
|
||||
#define hasText __pyx_api_f_4lxml_5etree_hasText
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0;
|
||||
#define hasTail __pyx_api_f_4lxml_5etree_hasTail
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0;
|
||||
#define textOf __pyx_api_f_4lxml_5etree_textOf
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0;
|
||||
#define tailOf __pyx_api_f_4lxml_5etree_tailOf
|
||||
static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0;
|
||||
#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText
|
||||
static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0;
|
||||
#define setTailText __pyx_api_f_4lxml_5etree_setTailText
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0;
|
||||
#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
||||
#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0;
|
||||
#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0;
|
||||
#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes
|
||||
static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
||||
#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue
|
||||
static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0;
|
||||
#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute
|
||||
static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0;
|
||||
#define hasChild __pyx_api_f_4lxml_5etree_hasChild
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChild __pyx_api_f_4lxml_5etree_findChild
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0;
|
||||
#define nextElement __pyx_api_f_4lxml_5etree_nextElement
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0;
|
||||
#define previousElement __pyx_api_f_4lxml_5etree_previousElement
|
||||
static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0;
|
||||
#define appendChild __pyx_api_f_4lxml_5etree_appendChild
|
||||
static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0;
|
||||
#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0;
|
||||
#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0;
|
||||
#define utf8 __pyx_api_f_4lxml_5etree_utf8
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0;
|
||||
#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0;
|
||||
#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0;
|
||||
#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0;
|
||||
#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName
|
||||
static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0;
|
||||
#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext
|
||||
static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0;
|
||||
#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
|
||||
static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
|
||||
static int __Pyx_ImportFunction_3_1_4(PyObject *module, const char *funcname, void (**f)(void), const char *sig);
|
||||
|
||||
#ifndef __PYX_HAVE_RT_ImportFunction_3_1_4
|
||||
#define __PYX_HAVE_RT_ImportFunction_3_1_4
|
||||
static int __Pyx_ImportFunction_3_1_4(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
|
||||
PyObject *d = 0;
|
||||
PyObject *cobj = 0;
|
||||
union {
|
||||
void (*fp)(void);
|
||||
void *p;
|
||||
} tmp;
|
||||
d = PyObject_GetAttrString(module, "__pyx_capi__");
|
||||
if (!d)
|
||||
goto bad;
|
||||
#if (defined(Py_LIMITED_API) && Py_LIMITED_API >= 0x030d0000) || (!defined(Py_LIMITED_API) && PY_VERSION_HEX >= 0x030d0000)
|
||||
PyDict_GetItemStringRef(d, funcname, &cobj);
|
||||
#else
|
||||
cobj = PyDict_GetItemString(d, funcname);
|
||||
Py_XINCREF(cobj);
|
||||
#endif
|
||||
if (!cobj) {
|
||||
PyErr_Format(PyExc_ImportError,
|
||||
"%.200s does not export expected C function %.200s",
|
||||
PyModule_GetName(module), funcname);
|
||||
goto bad;
|
||||
}
|
||||
if (!PyCapsule_IsValid(cobj, sig)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
||||
PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
|
||||
goto bad;
|
||||
}
|
||||
tmp.p = PyCapsule_GetPointer(cobj, sig);
|
||||
*f = tmp.fp;
|
||||
if (!(*f))
|
||||
goto bad;
|
||||
Py_DECREF(d);
|
||||
Py_DECREF(cobj);
|
||||
return 0;
|
||||
bad:
|
||||
Py_XDECREF(d);
|
||||
Py_XDECREF(cobj);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static int import_lxml__etree(void) {
|
||||
PyObject *module = 0;
|
||||
module = PyImport_ImportModule("lxml.etree");
|
||||
if (!module) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction_3_1_4(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
Py_DECREF(module); module = 0;
|
||||
return 0;
|
||||
bad:
|
||||
Py_XDECREF(module);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif /* !__PYX_HAVE_API__lxml__etree */
|
||||
@@ -1,830 +0,0 @@
|
||||
# support for extension functions in XPath and XSLT
|
||||
|
||||
cdef class XPathError(LxmlError):
|
||||
"""Base class of all XPath errors.
|
||||
"""
|
||||
|
||||
cdef class XPathEvalError(XPathError):
|
||||
"""Error during XPath evaluation.
|
||||
"""
|
||||
|
||||
cdef class XPathFunctionError(XPathEvalError):
|
||||
"""Internal error looking up an XPath extension function.
|
||||
"""
|
||||
|
||||
cdef class XPathResultError(XPathEvalError):
|
||||
"""Error handling an XPath result.
|
||||
"""
|
||||
|
||||
|
||||
# forward declarations
|
||||
|
||||
ctypedef int (*_register_function)(void* ctxt, name_utf, ns_uri_utf)
|
||||
cdef class _ExsltRegExp
|
||||
|
||||
################################################################################
|
||||
# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
|
||||
|
||||
@cython.internal
|
||||
cdef class _BaseContext:
|
||||
cdef xpath.xmlXPathContext* _xpathCtxt
|
||||
cdef _Document _doc
|
||||
cdef dict _extensions
|
||||
cdef list _namespaces
|
||||
cdef list _global_namespaces
|
||||
cdef dict _utf_refs
|
||||
cdef dict _function_cache
|
||||
cdef dict _eval_context_dict
|
||||
cdef bint _build_smart_strings
|
||||
# for exception handling and temporary reference keeping:
|
||||
cdef _TempStore _temp_refs
|
||||
cdef set _temp_documents
|
||||
cdef _ExceptionContext _exc
|
||||
cdef _ErrorLog _error_log
|
||||
|
||||
def __init__(self, namespaces, extensions, error_log, enable_regexp,
|
||||
build_smart_strings):
|
||||
cdef _ExsltRegExp _regexp
|
||||
cdef dict new_extensions
|
||||
cdef list ns
|
||||
self._utf_refs = {}
|
||||
self._global_namespaces = []
|
||||
self._function_cache = {}
|
||||
self._eval_context_dict = None
|
||||
self._error_log = error_log
|
||||
|
||||
if extensions is not None:
|
||||
# convert extensions to UTF-8
|
||||
if isinstance(extensions, dict):
|
||||
extensions = (extensions,)
|
||||
# format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function}
|
||||
new_extensions = {}
|
||||
for extension in extensions:
|
||||
for (ns_uri, name), function in extension.items():
|
||||
if name is None:
|
||||
raise ValueError, "extensions must have non empty names"
|
||||
ns_utf = self._to_utf(ns_uri)
|
||||
name_utf = self._to_utf(name)
|
||||
new_extensions[(ns_utf, name_utf)] = function
|
||||
extensions = new_extensions or None
|
||||
|
||||
if namespaces is not None:
|
||||
if isinstance(namespaces, dict):
|
||||
namespaces = namespaces.items()
|
||||
if namespaces:
|
||||
ns = []
|
||||
for prefix, ns_uri in namespaces:
|
||||
if prefix is None or not prefix:
|
||||
raise TypeError, \
|
||||
"empty namespace prefix is not supported in XPath"
|
||||
if ns_uri is None or not ns_uri:
|
||||
raise TypeError, \
|
||||
"setting default namespace is not supported in XPath"
|
||||
prefix_utf = self._to_utf(prefix)
|
||||
ns_uri_utf = self._to_utf(ns_uri)
|
||||
ns.append( (prefix_utf, ns_uri_utf) )
|
||||
namespaces = ns
|
||||
else:
|
||||
namespaces = None
|
||||
|
||||
self._doc = None
|
||||
self._exc = _ExceptionContext()
|
||||
self._extensions = extensions
|
||||
self._namespaces = namespaces
|
||||
self._temp_refs = _TempStore()
|
||||
self._temp_documents = set()
|
||||
self._build_smart_strings = build_smart_strings
|
||||
|
||||
if enable_regexp:
|
||||
_regexp = _ExsltRegExp()
|
||||
_regexp._register_in_context(self)
|
||||
|
||||
cdef _BaseContext _copy(self):
|
||||
cdef _BaseContext context
|
||||
if self._namespaces is not None:
|
||||
namespaces = self._namespaces[:]
|
||||
else:
|
||||
namespaces = None
|
||||
context = self.__class__(namespaces, None, self._error_log, False,
|
||||
self._build_smart_strings)
|
||||
if self._extensions is not None:
|
||||
context._extensions = self._extensions.copy()
|
||||
return context
|
||||
|
||||
cdef bytes _to_utf(self, s):
|
||||
"Convert to UTF-8 and keep a reference to the encoded string"
|
||||
cdef python.PyObject* dict_result
|
||||
if s is None:
|
||||
return None
|
||||
dict_result = python.PyDict_GetItem(self._utf_refs, s)
|
||||
if dict_result is not NULL:
|
||||
return <bytes>dict_result
|
||||
utf = _utf8(s)
|
||||
self._utf_refs[s] = utf
|
||||
if python.IS_PYPY:
|
||||
# use C level refs, PyPy refs are not enough!
|
||||
python.Py_INCREF(utf)
|
||||
return utf
|
||||
|
||||
cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt) noexcept:
|
||||
self._xpathCtxt = xpathCtxt
|
||||
xpathCtxt.userData = <void*>self
|
||||
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
||||
xpathCtxt.error = <xmlerror.xmlStructuredErrorFunc> _receiveXPathError
|
||||
|
||||
@cython.final
|
||||
cdef _register_context(self, _Document doc):
|
||||
self._doc = doc
|
||||
self._exc.clear()
|
||||
|
||||
@cython.final
|
||||
cdef _cleanup_context(self):
|
||||
#xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
|
||||
#self.unregisterGlobalNamespaces()
|
||||
if python.IS_PYPY:
|
||||
# clean up double refs in PyPy (see "_to_utf()" method)
|
||||
for ref in self._utf_refs.itervalues():
|
||||
python.Py_DECREF(ref)
|
||||
self._utf_refs.clear()
|
||||
self._eval_context_dict = None
|
||||
self._doc = None
|
||||
|
||||
@cython.final
|
||||
cdef _release_context(self):
|
||||
if self._xpathCtxt is not NULL:
|
||||
self._xpathCtxt.userData = NULL
|
||||
self._xpathCtxt = NULL
|
||||
|
||||
# namespaces (internal UTF-8 methods with leading '_')
|
||||
|
||||
cdef addNamespace(self, prefix, ns_uri):
|
||||
cdef list namespaces
|
||||
if prefix is None:
|
||||
raise TypeError, "empty prefix is not supported in XPath"
|
||||
prefix_utf = self._to_utf(prefix)
|
||||
ns_uri_utf = self._to_utf(ns_uri)
|
||||
new_item = (prefix_utf, ns_uri_utf)
|
||||
if self._namespaces is None:
|
||||
self._namespaces = [new_item]
|
||||
else:
|
||||
namespaces = []
|
||||
for item in self._namespaces:
|
||||
if item[0] == prefix_utf:
|
||||
item = new_item
|
||||
new_item = None
|
||||
namespaces.append(item)
|
||||
if new_item is not None:
|
||||
namespaces.append(new_item)
|
||||
self._namespaces = namespaces
|
||||
if self._xpathCtxt is not NULL:
|
||||
xpath.xmlXPathRegisterNs(
|
||||
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
|
||||
|
||||
cdef registerNamespace(self, prefix, ns_uri):
|
||||
if prefix is None:
|
||||
raise TypeError, "empty prefix is not supported in XPath"
|
||||
prefix_utf = self._to_utf(prefix)
|
||||
ns_uri_utf = self._to_utf(ns_uri)
|
||||
self._global_namespaces.append(prefix_utf)
|
||||
xpath.xmlXPathRegisterNs(self._xpathCtxt,
|
||||
_xcstr(prefix_utf), _xcstr(ns_uri_utf))
|
||||
|
||||
cdef registerLocalNamespaces(self):
|
||||
if self._namespaces is None:
|
||||
return
|
||||
for prefix_utf, ns_uri_utf in self._namespaces:
|
||||
xpath.xmlXPathRegisterNs(
|
||||
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
|
||||
|
||||
cdef registerGlobalNamespaces(self):
|
||||
cdef list ns_prefixes = _find_all_extension_prefixes()
|
||||
if python.PyList_GET_SIZE(ns_prefixes) > 0:
|
||||
for prefix_utf, ns_uri_utf in ns_prefixes:
|
||||
self._global_namespaces.append(prefix_utf)
|
||||
xpath.xmlXPathRegisterNs(
|
||||
self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
|
||||
|
||||
cdef unregisterGlobalNamespaces(self):
|
||||
if python.PyList_GET_SIZE(self._global_namespaces) > 0:
|
||||
for prefix_utf in self._global_namespaces:
|
||||
xpath.xmlXPathRegisterNs(self._xpathCtxt,
|
||||
_xcstr(prefix_utf), NULL)
|
||||
del self._global_namespaces[:]
|
||||
|
||||
cdef void _unregisterNamespace(self, prefix_utf) noexcept:
|
||||
xpath.xmlXPathRegisterNs(self._xpathCtxt,
|
||||
_xcstr(prefix_utf), NULL)
|
||||
|
||||
# extension functions
|
||||
|
||||
cdef int _addLocalExtensionFunction(self, ns_utf, name_utf, function) except -1:
|
||||
if self._extensions is None:
|
||||
self._extensions = {}
|
||||
self._extensions[(ns_utf, name_utf)] = function
|
||||
return 0
|
||||
|
||||
cdef registerGlobalFunctions(self, void* ctxt,
|
||||
_register_function reg_func):
|
||||
cdef python.PyObject* dict_result
|
||||
cdef dict d
|
||||
for ns_utf, ns_functions in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
|
||||
dict_result = python.PyDict_GetItem(
|
||||
self._function_cache, ns_utf)
|
||||
if dict_result is not NULL:
|
||||
d = <dict>dict_result
|
||||
else:
|
||||
d = {}
|
||||
self._function_cache[ns_utf] = d
|
||||
for name_utf, function in ns_functions.iteritems():
|
||||
d[name_utf] = function
|
||||
reg_func(ctxt, name_utf, ns_utf)
|
||||
|
||||
cdef registerLocalFunctions(self, void* ctxt,
|
||||
_register_function reg_func):
|
||||
cdef python.PyObject* dict_result
|
||||
cdef dict d
|
||||
if self._extensions is None:
|
||||
return # done
|
||||
last_ns = None
|
||||
d = None
|
||||
for (ns_utf, name_utf), function in self._extensions.iteritems():
|
||||
if ns_utf is not last_ns or d is None:
|
||||
last_ns = ns_utf
|
||||
dict_result = python.PyDict_GetItem(
|
||||
self._function_cache, ns_utf)
|
||||
if dict_result is not NULL:
|
||||
d = <dict>dict_result
|
||||
else:
|
||||
d = {}
|
||||
self._function_cache[ns_utf] = d
|
||||
d[name_utf] = function
|
||||
reg_func(ctxt, name_utf, ns_utf)
|
||||
|
||||
cdef unregisterAllFunctions(self, void* ctxt,
|
||||
_register_function unreg_func):
|
||||
for ns_utf, functions in self._function_cache.iteritems():
|
||||
for name_utf in functions:
|
||||
unreg_func(ctxt, name_utf, ns_utf)
|
||||
|
||||
cdef unregisterGlobalFunctions(self, void* ctxt,
|
||||
_register_function unreg_func):
|
||||
for ns_utf, functions in self._function_cache.items():
|
||||
for name_utf in functions:
|
||||
if self._extensions is None or \
|
||||
(ns_utf, name_utf) not in self._extensions:
|
||||
unreg_func(ctxt, name_utf, ns_utf)
|
||||
|
||||
@cython.final
|
||||
cdef _find_cached_function(self, const_xmlChar* c_ns_uri, const_xmlChar* c_name):
|
||||
"""Lookup an extension function in the cache and return it.
|
||||
|
||||
Parameters: c_ns_uri may be NULL, c_name must not be NULL
|
||||
"""
|
||||
cdef python.PyObject* c_dict
|
||||
cdef python.PyObject* dict_result
|
||||
c_dict = python.PyDict_GetItem(
|
||||
self._function_cache, None if c_ns_uri is NULL else c_ns_uri)
|
||||
if c_dict is not NULL:
|
||||
dict_result = python.PyDict_GetItem(
|
||||
<object>c_dict, <unsigned char*>c_name)
|
||||
if dict_result is not NULL:
|
||||
return <object>dict_result
|
||||
return None
|
||||
|
||||
# Python access to the XPath context for extension functions
|
||||
|
||||
@property
|
||||
def context_node(self):
|
||||
cdef xmlNode* c_node
|
||||
if self._xpathCtxt is NULL:
|
||||
raise XPathError, \
|
||||
"XPath context is only usable during the evaluation"
|
||||
c_node = self._xpathCtxt.node
|
||||
if c_node is NULL:
|
||||
raise XPathError, "no context node"
|
||||
if c_node.doc != self._xpathCtxt.doc:
|
||||
raise XPathError, \
|
||||
"document-external context nodes are not supported"
|
||||
if self._doc is None:
|
||||
raise XPathError, "document context is missing"
|
||||
return _elementFactory(self._doc, c_node)
|
||||
|
||||
@property
|
||||
def eval_context(self):
|
||||
if self._eval_context_dict is None:
|
||||
self._eval_context_dict = {}
|
||||
return self._eval_context_dict
|
||||
|
||||
# Python reference keeping during XPath function evaluation
|
||||
|
||||
@cython.final
|
||||
cdef _release_temp_refs(self):
|
||||
"Free temporarily referenced objects from this context."
|
||||
self._temp_refs.clear()
|
||||
self._temp_documents.clear()
|
||||
|
||||
@cython.final
|
||||
cdef _hold(self, obj):
|
||||
"""A way to temporarily hold references to nodes in the evaluator.
|
||||
|
||||
This is needed because otherwise nodes created in XPath extension
|
||||
functions would be reference counted too soon, during the XPath
|
||||
evaluation. This is most important in the case of exceptions.
|
||||
"""
|
||||
cdef _Element element
|
||||
if isinstance(obj, _Element):
|
||||
self._temp_refs.add(obj)
|
||||
self._temp_documents.add((<_Element>obj)._doc)
|
||||
return
|
||||
elif _isString(obj) or not python.PySequence_Check(obj):
|
||||
return
|
||||
for o in obj:
|
||||
if isinstance(o, _Element):
|
||||
#print "Holding element:", <int>element._c_node
|
||||
self._temp_refs.add(o)
|
||||
#print "Holding document:", <int>element._doc._c_doc
|
||||
self._temp_documents.add((<_Element>o)._doc)
|
||||
|
||||
@cython.final
|
||||
cdef _Document _findDocumentForNode(self, xmlNode* c_node):
|
||||
"""If an XPath expression returns an element from a different
|
||||
document than the current context document, we call this to
|
||||
see if it was possibly created by an extension and is a known
|
||||
document instance.
|
||||
"""
|
||||
cdef _Document doc
|
||||
for doc in self._temp_documents:
|
||||
if doc is not None and doc._c_doc is c_node.doc:
|
||||
return doc
|
||||
return None
|
||||
|
||||
|
||||
# libxml2 keeps these error messages in a static array in its code
|
||||
# and doesn't give us access to them ...
|
||||
|
||||
cdef tuple LIBXML2_XPATH_ERROR_MESSAGES = (
|
||||
b"Ok",
|
||||
b"Number encoding",
|
||||
b"Unfinished literal",
|
||||
b"Start of literal",
|
||||
b"Expected $ for variable reference",
|
||||
b"Undefined variable",
|
||||
b"Invalid predicate",
|
||||
b"Invalid expression",
|
||||
b"Missing closing curly brace",
|
||||
b"Unregistered function",
|
||||
b"Invalid operand",
|
||||
b"Invalid type",
|
||||
b"Invalid number of arguments",
|
||||
b"Invalid context size",
|
||||
b"Invalid context position",
|
||||
b"Memory allocation error",
|
||||
b"Syntax error",
|
||||
b"Resource error",
|
||||
b"Sub resource error",
|
||||
b"Undefined namespace prefix",
|
||||
b"Encoding error",
|
||||
b"Char out of XML range",
|
||||
b"Invalid or incomplete context",
|
||||
b"Stack usage error",
|
||||
b"Forbidden variable\n",
|
||||
b"?? Unknown error ??\n",
|
||||
)
|
||||
|
||||
cdef void _forwardXPathError(void* c_ctxt, const xmlerror.xmlError* c_error) noexcept with gil:
|
||||
cdef xmlerror.xmlError error
|
||||
cdef int xpath_code
|
||||
if c_error.message is not NULL:
|
||||
error.message = c_error.message
|
||||
else:
|
||||
xpath_code = c_error.code - xmlerror.XML_XPATH_EXPRESSION_OK
|
||||
if 0 <= xpath_code < len(LIBXML2_XPATH_ERROR_MESSAGES):
|
||||
error.message = _cstr(LIBXML2_XPATH_ERROR_MESSAGES[xpath_code])
|
||||
else:
|
||||
error.message = b"unknown error"
|
||||
error.domain = c_error.domain
|
||||
error.code = c_error.code
|
||||
error.level = c_error.level
|
||||
error.line = c_error.line
|
||||
error.int2 = c_error.int1 # column
|
||||
error.file = c_error.file
|
||||
error.node = NULL
|
||||
|
||||
(<_BaseContext>c_ctxt)._error_log._receive(&error)
|
||||
|
||||
cdef void _receiveXPathError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
|
||||
if not __DEBUG:
|
||||
return
|
||||
if c_context is NULL:
|
||||
_forwardError(NULL, error)
|
||||
else:
|
||||
_forwardXPathError(c_context, error)
|
||||
|
||||
|
||||
def Extension(module, function_mapping=None, *, ns=None):
|
||||
"""Extension(module, function_mapping=None, ns=None)
|
||||
|
||||
Build a dictionary of extension functions from the functions
|
||||
defined in a module or the methods of an object.
|
||||
|
||||
As second argument, you can pass an additional mapping of
|
||||
attribute names to XPath function names, or a list of function
|
||||
names that should be taken.
|
||||
|
||||
The ``ns`` keyword argument accepts a namespace URI for the XPath
|
||||
functions.
|
||||
"""
|
||||
cdef dict functions = {}
|
||||
if isinstance(function_mapping, dict):
|
||||
for function_name, xpath_name in function_mapping.items():
|
||||
functions[(ns, xpath_name)] = getattr(module, function_name)
|
||||
else:
|
||||
if function_mapping is None:
|
||||
function_mapping = [ name for name in dir(module)
|
||||
if not name.startswith('_') ]
|
||||
for function_name in function_mapping:
|
||||
functions[(ns, function_name)] = getattr(module, function_name)
|
||||
return functions
|
||||
|
||||
################################################################################
|
||||
# EXSLT regexp implementation
|
||||
|
||||
@cython.final
|
||||
@cython.internal
|
||||
cdef class _ExsltRegExp:
|
||||
cdef dict _compile_map
|
||||
def __cinit__(self):
|
||||
self._compile_map = {}
|
||||
|
||||
cdef _make_string(self, value):
|
||||
if _isString(value):
|
||||
return value
|
||||
elif isinstance(value, list):
|
||||
# node set: take recursive text concatenation of first element
|
||||
if python.PyList_GET_SIZE(value) == 0:
|
||||
return ''
|
||||
firstnode = value[0]
|
||||
if _isString(firstnode):
|
||||
return firstnode
|
||||
elif isinstance(firstnode, _Element):
|
||||
c_text = tree.xmlNodeGetContent((<_Element>firstnode)._c_node)
|
||||
if c_text is NULL:
|
||||
raise MemoryError()
|
||||
try:
|
||||
return funicode(c_text)
|
||||
finally:
|
||||
tree.xmlFree(c_text)
|
||||
else:
|
||||
return unicode(firstnode)
|
||||
else:
|
||||
return unicode(value)
|
||||
|
||||
cdef _compile(self, rexp, ignore_case):
|
||||
cdef python.PyObject* c_result
|
||||
rexp = self._make_string(rexp)
|
||||
key = (rexp, ignore_case)
|
||||
c_result = python.PyDict_GetItem(self._compile_map, key)
|
||||
if c_result is not NULL:
|
||||
return <object>c_result
|
||||
py_flags = re.UNICODE
|
||||
if ignore_case:
|
||||
py_flags = py_flags | re.IGNORECASE
|
||||
rexp_compiled = re.compile(rexp, py_flags)
|
||||
self._compile_map[key] = rexp_compiled
|
||||
return rexp_compiled
|
||||
|
||||
def test(self, ctxt, s, rexp, flags=''):
|
||||
flags = self._make_string(flags)
|
||||
s = self._make_string(s)
|
||||
rexpc = self._compile(rexp, 'i' in flags)
|
||||
if rexpc.search(s) is None:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def match(self, ctxt, s, rexp, flags=''):
|
||||
cdef list result_list
|
||||
flags = self._make_string(flags)
|
||||
s = self._make_string(s)
|
||||
rexpc = self._compile(rexp, 'i' in flags)
|
||||
if 'g' in flags:
|
||||
results = rexpc.findall(s)
|
||||
if not results:
|
||||
return ()
|
||||
else:
|
||||
result = rexpc.search(s)
|
||||
if not result:
|
||||
return ()
|
||||
results = [ result.group() ]
|
||||
results.extend( result.groups('') )
|
||||
result_list = []
|
||||
root = Element('matches')
|
||||
for s_match in results:
|
||||
if python.PyTuple_CheckExact(s_match):
|
||||
s_match = ''.join(s_match)
|
||||
elem = SubElement(root, 'match')
|
||||
elem.text = s_match
|
||||
result_list.append(elem)
|
||||
return result_list
|
||||
|
||||
def replace(self, ctxt, s, rexp, flags, replacement):
|
||||
replacement = self._make_string(replacement)
|
||||
flags = self._make_string(flags)
|
||||
s = self._make_string(s)
|
||||
rexpc = self._compile(rexp, 'i' in flags)
|
||||
count: object = 0 if 'g' in flags else 1
|
||||
return rexpc.sub(replacement, s, count)
|
||||
|
||||
cdef _register_in_context(self, _BaseContext context):
|
||||
ns = b"http://exslt.org/regular-expressions"
|
||||
context._addLocalExtensionFunction(ns, b"test", self.test)
|
||||
context._addLocalExtensionFunction(ns, b"match", self.match)
|
||||
context._addLocalExtensionFunction(ns, b"replace", self.replace)
|
||||
|
||||
|
||||
################################################################################
|
||||
# helper functions
|
||||
|
||||
cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc,
|
||||
_BaseContext context) except NULL:
|
||||
cdef xpath.xmlNodeSet* resultSet
|
||||
cdef _Element fake_node = None
|
||||
cdef xmlNode* c_node
|
||||
|
||||
if isinstance(obj, unicode):
|
||||
obj = _utf8(obj)
|
||||
if isinstance(obj, bytes):
|
||||
# libxml2 copies the string value
|
||||
return xpath.xmlXPathNewCString(_cstr(obj))
|
||||
if isinstance(obj, bool):
|
||||
return xpath.xmlXPathNewBoolean(obj)
|
||||
if python.PyNumber_Check(obj):
|
||||
return xpath.xmlXPathNewFloat(obj)
|
||||
if obj is None:
|
||||
resultSet = xpath.xmlXPathNodeSetCreate(NULL)
|
||||
elif isinstance(obj, _Element):
|
||||
resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node)
|
||||
elif python.PySequence_Check(obj):
|
||||
resultSet = xpath.xmlXPathNodeSetCreate(NULL)
|
||||
try:
|
||||
for value in obj:
|
||||
if isinstance(value, _Element):
|
||||
if context is not None:
|
||||
context._hold(value)
|
||||
xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node)
|
||||
else:
|
||||
if context is None or doc is None:
|
||||
raise XPathResultError, \
|
||||
f"Non-Element values not supported at this point - got {value!r}"
|
||||
# support strings by appending text nodes to an Element
|
||||
if isinstance(value, unicode):
|
||||
value = _utf8(value)
|
||||
if isinstance(value, bytes):
|
||||
if fake_node is None:
|
||||
fake_node = _makeElement("text-root", NULL, doc, None,
|
||||
None, None, None, None, None)
|
||||
context._hold(fake_node)
|
||||
else:
|
||||
# append a comment node to keep the text nodes separate
|
||||
c_node = tree.xmlNewDocComment(doc._c_doc, <unsigned char*>"")
|
||||
if c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(fake_node._c_node, c_node)
|
||||
context._hold(value)
|
||||
c_node = tree.xmlNewDocText(doc._c_doc, _xcstr(value))
|
||||
if c_node is NULL:
|
||||
raise MemoryError()
|
||||
tree.xmlAddChild(fake_node._c_node, c_node)
|
||||
xpath.xmlXPathNodeSetAdd(resultSet, c_node)
|
||||
else:
|
||||
raise XPathResultError, \
|
||||
f"This is not a supported node-set result: {value!r}"
|
||||
except:
|
||||
xpath.xmlXPathFreeNodeSet(resultSet)
|
||||
raise
|
||||
else:
|
||||
raise XPathResultError, f"Unknown return type: {python._fqtypename(obj).decode('utf8')}"
|
||||
return xpath.xmlXPathWrapNodeSet(resultSet)
|
||||
|
||||
cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj,
|
||||
_Document doc, _BaseContext context):
|
||||
if xpathObj.type == xpath.XPATH_UNDEFINED:
|
||||
raise XPathResultError, "Undefined xpath result"
|
||||
elif xpathObj.type == xpath.XPATH_NODESET:
|
||||
return _createNodeSetResult(xpathObj, doc, context)
|
||||
elif xpathObj.type == xpath.XPATH_BOOLEAN:
|
||||
return xpathObj.boolval
|
||||
elif xpathObj.type == xpath.XPATH_NUMBER:
|
||||
return xpathObj.floatval
|
||||
elif xpathObj.type == xpath.XPATH_STRING:
|
||||
stringval = funicode(xpathObj.stringval)
|
||||
if context._build_smart_strings:
|
||||
stringval = _elementStringResultFactory(
|
||||
stringval, None, None, False)
|
||||
return stringval
|
||||
elif xpathObj.type == xpath.XPATH_POINT:
|
||||
raise NotImplementedError, "XPATH_POINT"
|
||||
elif xpathObj.type == xpath.XPATH_RANGE:
|
||||
raise NotImplementedError, "XPATH_RANGE"
|
||||
elif xpathObj.type == xpath.XPATH_LOCATIONSET:
|
||||
raise NotImplementedError, "XPATH_LOCATIONSET"
|
||||
elif xpathObj.type == xpath.XPATH_USERS:
|
||||
raise NotImplementedError, "XPATH_USERS"
|
||||
elif xpathObj.type == xpath.XPATH_XSLT_TREE:
|
||||
return _createNodeSetResult(xpathObj, doc, context)
|
||||
else:
|
||||
raise XPathResultError, f"Unknown xpath result {xpathObj.type}"
|
||||
|
||||
cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc,
|
||||
_BaseContext context):
|
||||
cdef xmlNode* c_node
|
||||
cdef int i
|
||||
cdef list result
|
||||
result = []
|
||||
if xpathObj.nodesetval is NULL:
|
||||
return result
|
||||
for i in range(xpathObj.nodesetval.nodeNr):
|
||||
c_node = xpathObj.nodesetval.nodeTab[i]
|
||||
_unpackNodeSetEntry(result, c_node, doc, context,
|
||||
xpathObj.type == xpath.XPATH_XSLT_TREE)
|
||||
return result
|
||||
|
||||
cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc,
|
||||
_BaseContext context, bint is_fragment):
|
||||
cdef xmlNode* c_child
|
||||
if _isElement(c_node):
|
||||
if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
|
||||
# XXX: works, but maybe not always the right thing to do?
|
||||
# XPath: only runs when extensions create or copy trees
|
||||
# -> we store Python refs to these, so that is OK
|
||||
# XSLT: can it leak when merging trees from multiple sources?
|
||||
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
|
||||
# FIXME: call _instantiateElementFromXPath() instead?
|
||||
results.append(
|
||||
_fakeDocElementFactory(doc, c_node))
|
||||
elif c_node.type == tree.XML_TEXT_NODE or \
|
||||
c_node.type == tree.XML_CDATA_SECTION_NODE or \
|
||||
c_node.type == tree.XML_ATTRIBUTE_NODE:
|
||||
results.append(
|
||||
_buildElementStringResult(doc, c_node, context))
|
||||
elif c_node.type == tree.XML_NAMESPACE_DECL:
|
||||
results.append( (funicodeOrNone((<xmlNs*>c_node).prefix),
|
||||
funicodeOrNone((<xmlNs*>c_node).href)) )
|
||||
elif c_node.type == tree.XML_DOCUMENT_NODE or \
|
||||
c_node.type == tree.XML_HTML_DOCUMENT_NODE:
|
||||
# ignored for everything but result tree fragments
|
||||
if is_fragment:
|
||||
c_child = c_node.children
|
||||
while c_child is not NULL:
|
||||
_unpackNodeSetEntry(results, c_child, doc, context, 0)
|
||||
c_child = c_child.next
|
||||
elif c_node.type == tree.XML_XINCLUDE_START or \
|
||||
c_node.type == tree.XML_XINCLUDE_END:
|
||||
pass
|
||||
else:
|
||||
raise NotImplementedError, \
|
||||
f"Not yet implemented result node type: {c_node.type}"
|
||||
|
||||
cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj) noexcept:
|
||||
"""Free the XPath object, but *never* free the *content* of node sets.
|
||||
Python dealloc will do that for us.
|
||||
"""
|
||||
if xpathObj.nodesetval is not NULL:
|
||||
xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval)
|
||||
xpathObj.nodesetval = NULL
|
||||
xpath.xmlXPathFreeObject(xpathObj)
|
||||
|
||||
cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc,
|
||||
_BaseContext context):
|
||||
# NOTE: this may copy the element - only call this when it can't leak
|
||||
if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
|
||||
# not from the context document and not from a fake document
|
||||
# either => may still be from a known document, e.g. one
|
||||
# created by an extension function
|
||||
node_doc = context._findDocumentForNode(c_node)
|
||||
if node_doc is None:
|
||||
# not from a known document at all! => can only make a
|
||||
# safety copy here
|
||||
c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
|
||||
else:
|
||||
doc = node_doc
|
||||
return _fakeDocElementFactory(doc, c_node)
|
||||
|
||||
################################################################################
|
||||
# special str/unicode subclasses
|
||||
|
||||
@cython.final
|
||||
cdef class _ElementUnicodeResult(unicode):
|
||||
cdef _Element _parent
|
||||
cdef readonly object attrname
|
||||
cdef readonly bint is_tail
|
||||
|
||||
def getparent(self):
|
||||
return self._parent
|
||||
|
||||
@property
|
||||
def is_text(self):
|
||||
return self._parent is not None and not (self.is_tail or self.attrname is not None)
|
||||
|
||||
@property
|
||||
def is_attribute(self):
|
||||
return self.attrname is not None
|
||||
|
||||
cdef object _elementStringResultFactory(string_value, _Element parent,
|
||||
attrname, bint is_tail):
|
||||
result = _ElementUnicodeResult(string_value)
|
||||
result._parent = parent
|
||||
result.is_tail = is_tail
|
||||
result.attrname = attrname
|
||||
return result
|
||||
|
||||
cdef object _buildElementStringResult(_Document doc, xmlNode* c_node,
|
||||
_BaseContext context):
|
||||
cdef _Element parent = None
|
||||
cdef object attrname = None
|
||||
cdef xmlNode* c_element
|
||||
cdef bint is_tail
|
||||
|
||||
if c_node.type == tree.XML_ATTRIBUTE_NODE:
|
||||
attrname = _namespacedName(c_node)
|
||||
is_tail = 0
|
||||
s = tree.xmlNodeGetContent(c_node)
|
||||
try:
|
||||
value = funicode(s)
|
||||
finally:
|
||||
tree.xmlFree(s)
|
||||
c_element = NULL
|
||||
else:
|
||||
#assert c_node.type == tree.XML_TEXT_NODE or c_node.type == tree.XML_CDATA_SECTION_NODE, "invalid node type"
|
||||
# may be tail text or normal text
|
||||
value = funicode(c_node.content)
|
||||
c_element = _previousElement(c_node)
|
||||
is_tail = c_element is not NULL
|
||||
|
||||
if not context._build_smart_strings:
|
||||
return value
|
||||
|
||||
if c_element is NULL:
|
||||
# non-tail text or attribute text
|
||||
c_element = c_node.parent
|
||||
while c_element is not NULL and not _isElement(c_element):
|
||||
c_element = c_element.parent
|
||||
|
||||
if c_element is not NULL:
|
||||
parent = _instantiateElementFromXPath(c_element, doc, context)
|
||||
|
||||
return _elementStringResultFactory(
|
||||
value, parent, attrname, is_tail)
|
||||
|
||||
################################################################################
|
||||
# callbacks for XPath/XSLT extension functions
|
||||
|
||||
cdef void _extension_function_call(_BaseContext context, function,
|
||||
xpath.xmlXPathParserContext* ctxt, int nargs) noexcept:
|
||||
cdef _Document doc
|
||||
cdef xpath.xmlXPathObject* obj
|
||||
cdef list args
|
||||
cdef int i
|
||||
doc = context._doc
|
||||
try:
|
||||
args = []
|
||||
for i in range(nargs):
|
||||
obj = xpath.valuePop(ctxt)
|
||||
o = _unwrapXPathObject(obj, doc, context)
|
||||
_freeXPathObject(obj)
|
||||
args.append(o)
|
||||
args.reverse()
|
||||
|
||||
res = function(context, *args)
|
||||
# wrap result for XPath consumption
|
||||
obj = _wrapXPathObject(res, doc, context)
|
||||
# prevent Python from deallocating elements handed to libxml2
|
||||
context._hold(res)
|
||||
xpath.valuePush(ctxt, obj)
|
||||
except:
|
||||
xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR)
|
||||
context._exc._store_raised()
|
||||
finally:
|
||||
return # swallow any further exceptions
|
||||
|
||||
# lookup the function by name and call it
|
||||
|
||||
cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt,
|
||||
int nargs) noexcept with gil:
|
||||
cdef _BaseContext context
|
||||
cdef xpath.xmlXPathContext* rctxt = ctxt.context
|
||||
context = <_BaseContext> rctxt.userData
|
||||
try:
|
||||
function = context._find_cached_function(rctxt.functionURI, rctxt.function)
|
||||
if function is not None:
|
||||
_extension_function_call(context, function, ctxt, nargs)
|
||||
else:
|
||||
xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
|
||||
context._exc._store_exception(XPathFunctionError(
|
||||
f"XPath function '{_namespacedNameFromNsName(rctxt.functionURI, rctxt.function)}' not found"))
|
||||
except:
|
||||
# may not be the right error, but we need to tell libxml2 *something*
|
||||
xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
|
||||
context._exc._store_raised()
|
||||
finally:
|
||||
return # swallow any further exceptions
|
||||
@@ -1,10 +0,0 @@
|
||||
__doc__ = """Legacy interface to the BeautifulSoup HTML parser.
|
||||
"""
|
||||
|
||||
__all__ = ["parse", "convert_tree"]
|
||||
|
||||
from .soupparser import convert_tree, parse as _parse
|
||||
|
||||
def parse(file, beautifulsoup=None, makeelement=None):
|
||||
root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement)
|
||||
return root.getroot()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,86 +0,0 @@
|
||||
import optparse
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
from .diff import htmldiff
|
||||
|
||||
description = """\
|
||||
"""
|
||||
|
||||
parser = optparse.OptionParser(
|
||||
usage="%prog [OPTIONS] FILE1 FILE2\n"
|
||||
"%prog --annotate [OPTIONS] INFO1 FILE1 INFO2 FILE2 ...",
|
||||
description=description,
|
||||
)
|
||||
|
||||
parser.add_option(
|
||||
'-o', '--output',
|
||||
metavar="FILE",
|
||||
dest="output",
|
||||
default="-",
|
||||
help="File to write the difference to",
|
||||
)
|
||||
|
||||
parser.add_option(
|
||||
'-a', '--annotation',
|
||||
action="store_true",
|
||||
dest="annotation",
|
||||
help="Do an annotation")
|
||||
|
||||
def main(args=None):
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
options, args = parser.parse_args(args)
|
||||
if options.annotation:
|
||||
return annotate(options, args)
|
||||
if len(args) != 2:
|
||||
print('Error: you must give two files')
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
file1, file2 = args
|
||||
input1 = read_file(file1)
|
||||
input2 = read_file(file2)
|
||||
body1 = split_body(input1)[1]
|
||||
pre, body2, post = split_body(input2)
|
||||
result = htmldiff(body1, body2)
|
||||
result = pre + result + post
|
||||
if options.output == '-':
|
||||
if not result.endswith('\n'):
|
||||
result += '\n'
|
||||
sys.stdout.write(result)
|
||||
else:
|
||||
with open(options.output, 'wb') as f:
|
||||
f.write(result)
|
||||
|
||||
def read_file(filename):
|
||||
if filename == '-':
|
||||
c = sys.stdin.read()
|
||||
elif not os.path.exists(filename):
|
||||
raise OSError(
|
||||
"Input file %s does not exist" % filename)
|
||||
else:
|
||||
with open(filename, 'rb') as f:
|
||||
c = f.read()
|
||||
return c
|
||||
|
||||
body_start_re = re.compile(
|
||||
r"<body.*?>", re.I|re.S)
|
||||
body_end_re = re.compile(
|
||||
r"</body.*?>", re.I|re.S)
|
||||
|
||||
def split_body(html):
|
||||
pre = post = ''
|
||||
match = body_start_re.search(html)
|
||||
if match:
|
||||
pre = html[:match.end()]
|
||||
html = html[match.end():]
|
||||
match = body_end_re.search(html)
|
||||
if match:
|
||||
post = html[match.start():]
|
||||
html = html[:match.start()]
|
||||
return pre, html, post
|
||||
|
||||
def annotate(options, args):
|
||||
print("Not yet implemented")
|
||||
sys.exit(1)
|
||||
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -1,100 +0,0 @@
|
||||
"""
|
||||
Legacy module - don't use in new code!
|
||||
|
||||
html5lib now has its own proper implementation.
|
||||
|
||||
This module implements a tree builder for html5lib that generates lxml
|
||||
html element trees. This module uses camelCase as it follows the
|
||||
html5lib style guide.
|
||||
"""
|
||||
|
||||
from html5lib.treebuilders import _base, etree as etree_builders
|
||||
from lxml import html, etree
|
||||
|
||||
|
||||
class DocumentType:
|
||||
|
||||
def __init__(self, name, publicId, systemId):
|
||||
self.name = name
|
||||
self.publicId = publicId
|
||||
self.systemId = systemId
|
||||
|
||||
class Document:
|
||||
|
||||
def __init__(self):
|
||||
self._elementTree = None
|
||||
self.childNodes = []
|
||||
|
||||
def appendChild(self, element):
|
||||
self._elementTree.getroot().addnext(element._element)
|
||||
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = None
|
||||
commentClass = None
|
||||
fragmentClass = Document
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
html_builder = etree_builders.getETreeModule(html, fullTree=False)
|
||||
etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
|
||||
self.elementClass = html_builder.Element
|
||||
self.commentClass = etree_builder.Comment
|
||||
_base.TreeBuilder.__init__(self, *args, **kwargs)
|
||||
|
||||
def reset(self):
|
||||
_base.TreeBuilder.reset(self)
|
||||
self.rootInserted = False
|
||||
self.initialComments = []
|
||||
self.doctype = None
|
||||
|
||||
def getDocument(self):
|
||||
return self.document._elementTree
|
||||
|
||||
def getFragment(self):
|
||||
fragment = []
|
||||
element = self.openElements[0]._element
|
||||
if element.text:
|
||||
fragment.append(element.text)
|
||||
fragment.extend(element.getchildren())
|
||||
if element.tail:
|
||||
fragment.append(element.tail)
|
||||
return fragment
|
||||
|
||||
def insertDoctype(self, name, publicId, systemId):
|
||||
doctype = self.doctypeClass(name, publicId, systemId)
|
||||
self.doctype = doctype
|
||||
|
||||
def insertComment(self, data, parent=None):
|
||||
if not self.rootInserted:
|
||||
self.initialComments.append(data)
|
||||
else:
|
||||
_base.TreeBuilder.insertComment(self, data, parent)
|
||||
|
||||
def insertRoot(self, name):
|
||||
buf = []
|
||||
if self.doctype and self.doctype.name:
|
||||
buf.append('<!DOCTYPE %s' % self.doctype.name)
|
||||
if self.doctype.publicId is not None or self.doctype.systemId is not None:
|
||||
buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
|
||||
self.doctype.systemId))
|
||||
buf.append('>')
|
||||
buf.append('<html></html>')
|
||||
root = html.fromstring(''.join(buf))
|
||||
|
||||
# Append the initial comments:
|
||||
for comment in self.initialComments:
|
||||
root.addprevious(etree.Comment(comment))
|
||||
|
||||
# Create the root document and add the ElementTree to it
|
||||
self.document = self.documentClass()
|
||||
self.document._elementTree = root.getroottree()
|
||||
|
||||
# Add the root element to the internal child/open data structures
|
||||
root_element = self.elementClass(name)
|
||||
root_element._element = root
|
||||
self.document.childNodes.append(root_element)
|
||||
self.openElements.append(root_element)
|
||||
|
||||
self.rootInserted = True
|
||||
@@ -1,56 +0,0 @@
|
||||
try:
|
||||
from collections.abc import MutableSet
|
||||
except ImportError:
|
||||
from collections.abc import MutableSet
|
||||
|
||||
|
||||
class SetMixin(MutableSet):
|
||||
|
||||
"""
|
||||
Mix-in for sets. You must define __iter__, add, remove
|
||||
"""
|
||||
|
||||
def __len__(self):
|
||||
length = 0
|
||||
for item in self:
|
||||
length += 1
|
||||
return length
|
||||
|
||||
def __contains__(self, item):
|
||||
for has_item in self:
|
||||
if item == has_item:
|
||||
return True
|
||||
return False
|
||||
|
||||
issubset = MutableSet.__le__
|
||||
issuperset = MutableSet.__ge__
|
||||
|
||||
union = MutableSet.__or__
|
||||
intersection = MutableSet.__and__
|
||||
difference = MutableSet.__sub__
|
||||
symmetric_difference = MutableSet.__xor__
|
||||
|
||||
def copy(self):
|
||||
return set(self)
|
||||
|
||||
def update(self, other):
|
||||
self |= other
|
||||
|
||||
def intersection_update(self, other):
|
||||
self &= other
|
||||
|
||||
def difference_update(self, other):
|
||||
self -= other
|
||||
|
||||
def symmetric_difference_update(self, other):
|
||||
self ^= other
|
||||
|
||||
def discard(self, item):
|
||||
try:
|
||||
self.remove(item)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def _from_iterable(cls, it):
|
||||
return set(it)
|
||||
@@ -1,173 +0,0 @@
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
A set of HTML generator tags for building HTML documents.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from lxml.html.builder import *
|
||||
>>> html = HTML(
|
||||
... HEAD( TITLE("Hello World") ),
|
||||
... BODY( CLASS("main"),
|
||||
... H1("Hello World !")
|
||||
... )
|
||||
... )
|
||||
|
||||
>>> import lxml.etree
|
||||
>>> print lxml.etree.tostring(html, pretty_print=True)
|
||||
<html>
|
||||
<head>
|
||||
<title>Hello World</title>
|
||||
</head>
|
||||
<body class="main">
|
||||
<h1>Hello World !</h1>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
from lxml.builder import ElementMaker
|
||||
from lxml.html import html_parser
|
||||
|
||||
E = ElementMaker(makeelement=html_parser.makeelement)
|
||||
|
||||
# elements
|
||||
A = E.a #: anchor
|
||||
ABBR = E.abbr #: abbreviated form (e.g., WWW, HTTP, etc.)
|
||||
ACRONYM = E.acronym #:
|
||||
ADDRESS = E.address #: information on author
|
||||
APPLET = E.applet #: Java applet (DEPRECATED)
|
||||
AREA = E.area #: client-side image map area
|
||||
ARTICLE = E.article #: self-contained article
|
||||
ASIDE = E.aside #: indirectly-related content
|
||||
AUDIO = E.audio #: embedded audio file
|
||||
B = E.b #: bold text style
|
||||
BASE = E.base #: document base URI
|
||||
BASEFONT = E.basefont #: base font size (DEPRECATED)
|
||||
BDI = E.bdi #: isolate bidirectional text
|
||||
BDO = E.bdo #: I18N BiDi over-ride
|
||||
BIG = E.big #: large text style
|
||||
BLOCKQUOTE = E.blockquote #: long quotation
|
||||
BODY = E.body #: document body
|
||||
BR = E.br #: forced line break
|
||||
BUTTON = E.button #: push button
|
||||
CANVAS = E.canvas #: scriptable graphics container
|
||||
CAPTION = E.caption #: table caption
|
||||
CENTER = E.center #: shorthand for DIV align=center (DEPRECATED)
|
||||
CITE = E.cite #: citation
|
||||
CODE = E.code #: computer code fragment
|
||||
COL = E.col #: table column
|
||||
COLGROUP = E.colgroup #: table column group
|
||||
DATA = E.data #: machine-readable translation
|
||||
DATALIST = E.datalist #: list of options for an input
|
||||
DD = E.dd #: definition description
|
||||
DEL = getattr(E, 'del') #: deleted text
|
||||
DETAILS = E.details #: expandable section
|
||||
DFN = E.dfn #: instance definition
|
||||
DIALOG = E.dialog #: dialog box
|
||||
DIR = E.dir #: directory list (DEPRECATED)
|
||||
DIV = E.div #: generic language/style container
|
||||
DL = E.dl #: definition list
|
||||
DT = E.dt #: definition term
|
||||
EM = E.em #: emphasis
|
||||
EMBED = E.embed #: embedded external content
|
||||
FIELDSET = E.fieldset #: form control group
|
||||
FIGCAPTION = E.figcaption #: figure caption
|
||||
FIGURE = E.figure #: self-contained, possibly-captioned content
|
||||
FONT = E.font #: local change to font (DEPRECATED)
|
||||
FOOTER = E.footer #: footer for nearest ancestor
|
||||
FORM = E.form #: interactive form
|
||||
FRAME = E.frame #: subwindow
|
||||
FRAMESET = E.frameset #: window subdivision
|
||||
H1 = E.h1 #: heading
|
||||
H2 = E.h2 #: heading
|
||||
H3 = E.h3 #: heading
|
||||
H4 = E.h4 #: heading
|
||||
H5 = E.h5 #: heading
|
||||
H6 = E.h6 #: heading
|
||||
HEAD = E.head #: document head
|
||||
HEADER = E.header #: heading content
|
||||
HGROUP = E.hgroup #: heading group
|
||||
HR = E.hr #: horizontal rule
|
||||
HTML = E.html #: document root element
|
||||
I = E.i #: italic text style
|
||||
IFRAME = E.iframe #: inline subwindow
|
||||
IMG = E.img #: Embedded image
|
||||
INPUT = E.input #: form control
|
||||
INS = E.ins #: inserted text
|
||||
ISINDEX = E.isindex #: single line prompt (DEPRECATED)
|
||||
KBD = E.kbd #: text to be entered by the user
|
||||
LABEL = E.label #: form field label text
|
||||
LEGEND = E.legend #: fieldset legend
|
||||
LI = E.li #: list item
|
||||
LINK = E.link #: a media-independent link
|
||||
MAIN = E.main #: main content
|
||||
MAP = E.map #: client-side image map
|
||||
MARK = E.mark #: marked/highlighted text
|
||||
MARQUEE = E.marquee #: scrolling text
|
||||
MENU = E.menu #: menu list (DEPRECATED)
|
||||
META = E.meta #: generic metainformation
|
||||
METER = E.meter #: numerical value display
|
||||
NAV = E.nav #: navigation section
|
||||
NOBR = E.nobr #: prevent wrapping
|
||||
NOFRAMES = E.noframes #: alternate content container for non frame-based rendering
|
||||
NOSCRIPT = E.noscript #: alternate content container for non script-based rendering
|
||||
OBJECT = E.object #: generic embedded object
|
||||
OL = E.ol #: ordered list
|
||||
OPTGROUP = E.optgroup #: option group
|
||||
OPTION = E.option #: selectable choice
|
||||
OUTPUT = E.output #: result of a calculation
|
||||
P = E.p #: paragraph
|
||||
PARAM = E.param #: named property value
|
||||
PICTURE = E.picture #: picture with multiple sources
|
||||
PORTAL = E.portal #: embedded preview
|
||||
PRE = E.pre #: preformatted text
|
||||
PROGRESS = E.progress #: progress bar
|
||||
Q = E.q #: short inline quotation
|
||||
RB = E.rb #: ruby base text
|
||||
RP = E.rp #: ruby parentheses
|
||||
RT = E.rt #: ruby text component
|
||||
RTC = E.rtc #: ruby semantic annotation
|
||||
RUBY = E.ruby #: ruby annotations
|
||||
S = E.s #: strike-through text style (DEPRECATED)
|
||||
SAMP = E.samp #: sample program output, scripts, etc.
|
||||
SCRIPT = E.script #: script statements
|
||||
SEARCH = E.search #: set of form controls for a search
|
||||
SECTION = E.section #: generic standalone section
|
||||
SELECT = E.select #: option selector
|
||||
SLOT = E.slot #: placeholder for JS use
|
||||
SMALL = E.small #: small text style
|
||||
SOURCE = E.source #: source for picture/audio/video element
|
||||
SPAN = E.span #: generic language/style container
|
||||
STRIKE = E.strike #: strike-through text (DEPRECATED)
|
||||
STRONG = E.strong #: strong emphasis
|
||||
STYLE = E.style #: style info
|
||||
SUB = E.sub #: subscript
|
||||
SUMMARY = E.summary #: summary for <details>
|
||||
SUP = E.sup #: superscript
|
||||
TABLE = E.table #:
|
||||
TBODY = E.tbody #: table body
|
||||
TD = E.td #: table data cell
|
||||
TEMPLATE = E.template #: fragment for JS use
|
||||
TEXTAREA = E.textarea #: multi-line text field
|
||||
TFOOT = E.tfoot #: table footer
|
||||
TH = E.th #: table header cell
|
||||
THEAD = E.thead #: table header
|
||||
TIME = E.time #: date/time
|
||||
TITLE = E.title #: document title
|
||||
TR = E.tr #: table row
|
||||
TRACK = E.track #: audio/video track
|
||||
TT = E.tt #: teletype or monospaced text style
|
||||
U = E.u #: underlined text style (DEPRECATED)
|
||||
UL = E.ul #: unordered list
|
||||
VAR = E.var #: instance of a variable or program argument
|
||||
VIDEO = E.video #: embedded video file
|
||||
WBR = E.wbr #: word break
|
||||
|
||||
# attributes (only reserved words are included here)
|
||||
ATTR = dict
|
||||
def CLASS(v): return {'class': v}
|
||||
def FOR(v): return {'for': v}
|
||||
@@ -1,21 +0,0 @@
|
||||
# cython: language_level=3str
|
||||
|
||||
"""Backward-compatibility module for lxml_html_clean"""
|
||||
|
||||
try:
|
||||
from lxml_html_clean import *
|
||||
|
||||
__all__ = [
|
||||
"clean_html",
|
||||
"clean",
|
||||
"Cleaner",
|
||||
"autolink",
|
||||
"autolink_html",
|
||||
"word_break",
|
||||
"word_break_html",
|
||||
]
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"lxml.html.clean module is now a separate project lxml_html_clean.\n"
|
||||
"Install lxml[html_clean] or lxml_html_clean directly."
|
||||
) from None
|
||||
@@ -1,135 +0,0 @@
|
||||
# FIXME: this should all be confirmed against what a DTD says
|
||||
# (probably in a test; this may not match the DTD exactly, but we
|
||||
# should document just how it differs).
|
||||
|
||||
"""
|
||||
Data taken from https://www.w3.org/TR/html401/index/elements.html
|
||||
and https://html.spec.whatwg.org/multipage/syntax.html#elements-2
|
||||
for html5_tags.
|
||||
"""
|
||||
|
||||
empty_tags = frozenset([
|
||||
'area', 'base', 'basefont', 'br', 'col', 'embed', 'frame', 'hr',
|
||||
'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track', 'wbr'])
|
||||
|
||||
deprecated_tags = frozenset([
|
||||
'applet', 'basefont', 'center', 'dir', 'font', 'isindex',
|
||||
'menu', 's', 'strike', 'u'])
|
||||
|
||||
# archive actually takes a space-separated list of URIs
|
||||
link_attrs = frozenset([
|
||||
'action', 'archive', 'background', 'cite', 'classid',
|
||||
'codebase', 'data', 'href', 'longdesc', 'profile', 'src',
|
||||
'usemap',
|
||||
# Not standard:
|
||||
'dynsrc', 'lowsrc',
|
||||
# HTML5 formaction
|
||||
'formaction'
|
||||
])
|
||||
|
||||
# Not in the HTML 4 spec:
|
||||
# onerror, onresize
|
||||
event_attrs = frozenset([
|
||||
'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror',
|
||||
'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload',
|
||||
'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover',
|
||||
'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit',
|
||||
'onunload',
|
||||
])
|
||||
|
||||
safe_attrs = frozenset([
|
||||
'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
|
||||
'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff',
|
||||
'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan',
|
||||
'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype',
|
||||
'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id',
|
||||
'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
|
||||
'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
|
||||
'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape',
|
||||
'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
||||
'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
|
||||
|
||||
# From http://htmlhelp.com/reference/html40/olist.html
|
||||
top_level_tags = frozenset([
|
||||
'html', 'head', 'body', 'frameset',
|
||||
])
|
||||
|
||||
head_tags = frozenset([
|
||||
'base', 'isindex', 'link', 'meta', 'script', 'style', 'title',
|
||||
])
|
||||
|
||||
general_block_tags = frozenset([
|
||||
'address',
|
||||
'blockquote',
|
||||
'center',
|
||||
'del',
|
||||
'div',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'hr',
|
||||
'ins',
|
||||
'isindex',
|
||||
'noscript',
|
||||
'p',
|
||||
'pre',
|
||||
])
|
||||
|
||||
list_tags = frozenset([
|
||||
'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul',
|
||||
])
|
||||
|
||||
table_tags = frozenset([
|
||||
'table', 'caption', 'colgroup', 'col',
|
||||
'thead', 'tfoot', 'tbody', 'tr', 'td', 'th',
|
||||
])
|
||||
|
||||
# just this one from
|
||||
# http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm
|
||||
block_tags = general_block_tags | list_tags | table_tags | frozenset([
|
||||
# Partial form tags
|
||||
'fieldset', 'form', 'legend', 'optgroup', 'option',
|
||||
])
|
||||
|
||||
form_tags = frozenset([
|
||||
'form', 'button', 'fieldset', 'legend', 'input', 'label',
|
||||
'select', 'optgroup', 'option', 'textarea',
|
||||
])
|
||||
|
||||
special_inline_tags = frozenset([
|
||||
'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe',
|
||||
'img', 'map', 'area', 'object', 'param', 'q', 'script',
|
||||
'span', 'sub', 'sup',
|
||||
])
|
||||
|
||||
phrase_tags = frozenset([
|
||||
'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em',
|
||||
'ins', 'kbd', 'samp', 'strong', 'var',
|
||||
])
|
||||
|
||||
font_style_tags = frozenset([
|
||||
'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u',
|
||||
])
|
||||
|
||||
frame_tags = frozenset([
|
||||
'frameset', 'frame', 'noframes',
|
||||
])
|
||||
|
||||
html5_tags = frozenset([
|
||||
'article', 'aside', 'audio', 'canvas', 'command', 'datalist',
|
||||
'details', 'embed', 'figcaption', 'figure', 'footer', 'header',
|
||||
'hgroup', 'keygen', 'mark', 'math', 'meter', 'nav', 'output',
|
||||
'progress', 'rp', 'rt', 'ruby', 'section', 'source', 'summary',
|
||||
'svg', 'time', 'track', 'video', 'wbr'
|
||||
])
|
||||
|
||||
# These tags aren't standard
|
||||
nonstandard_tags = frozenset(['blink', 'marquee'])
|
||||
|
||||
|
||||
tags = (top_level_tags | head_tags | general_block_tags | list_tags
|
||||
| table_tags | form_tags | special_inline_tags | phrase_tags
|
||||
| font_style_tags | nonstandard_tags | html5_tags)
|
||||
Binary file not shown.
@@ -1,972 +0,0 @@
|
||||
# cython: language_level=3
|
||||
|
||||
try:
|
||||
import cython
|
||||
except ImportError:
|
||||
class fake_cython:
|
||||
compiled = False
|
||||
def cfunc(self, func): return func
|
||||
def cclass(self, func): return func
|
||||
def declare(self, _, value): return value
|
||||
def __getattr__(self, type_name): return "object"
|
||||
|
||||
cython = fake_cython()
|
||||
|
||||
try:
|
||||
from . import _difflib as difflib
|
||||
import inspect
|
||||
if inspect.isfunction(difflib.get_close_matches):
|
||||
raise ImportError(
|
||||
"Embedded difflib is not compiled to a fast binary, using the stdlib instead.")
|
||||
from cython.cimports.lxml.html._difflib import SequenceMatcher
|
||||
except ImportError:
|
||||
import difflib
|
||||
if not cython.compiled:
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
import itertools
|
||||
import functools
|
||||
import operator
|
||||
import re
|
||||
|
||||
from lxml import etree
|
||||
from lxml.html import fragment_fromstring
|
||||
from . import defs
|
||||
|
||||
__all__ = ['html_annotate', 'htmldiff']
|
||||
|
||||
group_by_first_item = functools.partial(itertools.groupby, key=operator.itemgetter(0))
|
||||
|
||||
|
||||
############################################################
|
||||
## Annotation
|
||||
############################################################
|
||||
|
||||
@cython.cfunc
|
||||
def html_escape(text: str, _escapes: tuple = ('&', '<', '>', '"', ''')) -> str:
|
||||
# Not so slow compiled version of 'html.escape()'.
|
||||
# Most of the time, we replace little to nothing, so use a fast decision what needs to be done.
|
||||
ch: cython.Py_UCS4
|
||||
replace: cython.char[5] = [False] * 5
|
||||
for ch in text:
|
||||
replace[0] |= ch == '&'
|
||||
replace[1] |= ch == '<'
|
||||
replace[2] |= ch == '>'
|
||||
replace[3] |= ch == '"'
|
||||
replace[4] |= ch == "'"
|
||||
|
||||
for i in range(5):
|
||||
if replace[i]:
|
||||
text = text.replace('&<>"\''[i], _escapes[i])
|
||||
|
||||
return text
|
||||
|
||||
|
||||
if not cython.compiled:
|
||||
from html import escape as html_escape
|
||||
|
||||
|
||||
def default_markup(text, version):
|
||||
return '<span title="%s">%s</span>' % (
|
||||
html_escape(version), text)
|
||||
|
||||
def html_annotate(doclist, markup=default_markup):
|
||||
"""
|
||||
doclist should be ordered from oldest to newest, like::
|
||||
|
||||
>>> version1 = 'Hello World'
|
||||
>>> version2 = 'Goodbye World'
|
||||
>>> print(html_annotate([(version1, 'version 1'),
|
||||
... (version2, 'version 2')]))
|
||||
<span title="version 2">Goodbye</span> <span title="version 1">World</span>
|
||||
|
||||
The documents must be *fragments* (str/UTF8 or unicode), not
|
||||
complete documents
|
||||
|
||||
The markup argument is a function to markup the spans of words.
|
||||
This function is called like markup('Hello', 'version 2'), and
|
||||
returns HTML. The first argument is text and never includes any
|
||||
markup. The default uses a span with a title:
|
||||
|
||||
>>> print(default_markup('Some Text', 'by Joe'))
|
||||
<span title="by Joe">Some Text</span>
|
||||
"""
|
||||
# The basic strategy we have is to split the documents up into
|
||||
# logical tokens (which are words with attached markup). We then
|
||||
# do diffs of each of the versions to track when a token first
|
||||
# appeared in the document; the annotation attached to the token
|
||||
# is the version where it first appeared.
|
||||
tokenlist = [tokenize_annotated(doc, version)
|
||||
for doc, version in doclist]
|
||||
cur_tokens = tokenlist[0]
|
||||
for tokens in tokenlist[1:]:
|
||||
html_annotate_merge_annotations(cur_tokens, tokens)
|
||||
cur_tokens = tokens
|
||||
|
||||
# After we've tracked all the tokens, we can combine spans of text
|
||||
# that are adjacent and have the same annotation
|
||||
cur_tokens = compress_tokens(cur_tokens)
|
||||
# And finally add markup
|
||||
result = markup_serialize_tokens(cur_tokens, markup)
|
||||
return ''.join(result).strip()
|
||||
|
||||
def tokenize_annotated(doc, annotation):
|
||||
"""Tokenize a document and add an annotation attribute to each token
|
||||
"""
|
||||
tokens = tokenize(doc, include_hrefs=False)
|
||||
for tok in tokens:
|
||||
tok.annotation = annotation
|
||||
return tokens
|
||||
|
||||
def html_annotate_merge_annotations(tokens_old, tokens_new):
|
||||
"""Merge the annotations from tokens_old into tokens_new, when the
|
||||
tokens in the new document already existed in the old document.
|
||||
"""
|
||||
s = InsensitiveSequenceMatcher(a=tokens_old, b=tokens_new)
|
||||
commands = s.get_opcodes()
|
||||
|
||||
for command, i1, i2, j1, j2 in commands:
|
||||
if command == 'equal':
|
||||
eq_old = tokens_old[i1:i2]
|
||||
eq_new = tokens_new[j1:j2]
|
||||
copy_annotations(eq_old, eq_new)
|
||||
|
||||
def copy_annotations(src, dest):
|
||||
"""
|
||||
Copy annotations from the tokens listed in src to the tokens in dest
|
||||
"""
|
||||
assert len(src) == len(dest)
|
||||
for src_tok, dest_tok in zip(src, dest):
|
||||
dest_tok.annotation = src_tok.annotation
|
||||
|
||||
def compress_tokens(tokens):
|
||||
"""
|
||||
Combine adjacent tokens when there is no HTML between the tokens,
|
||||
and they share an annotation
|
||||
"""
|
||||
result = [tokens[0]]
|
||||
for tok in tokens[1:]:
|
||||
if (not tok.pre_tags and
|
||||
not result[-1].post_tags and
|
||||
result[-1].annotation == tok.annotation):
|
||||
compress_merge_back(result, tok)
|
||||
else:
|
||||
result.append(tok)
|
||||
return result
|
||||
|
||||
@cython.cfunc
|
||||
def compress_merge_back(tokens: list, tok):
|
||||
""" Merge tok into the last element of tokens (modifying the list of
|
||||
tokens in-place). """
|
||||
last = tokens[-1]
|
||||
if type(last) is not token or type(tok) is not token:
|
||||
tokens.append(tok)
|
||||
else:
|
||||
text = last + last.trailing_whitespace + tok
|
||||
merged = token(text,
|
||||
pre_tags=last.pre_tags,
|
||||
post_tags=tok.post_tags,
|
||||
trailing_whitespace=tok.trailing_whitespace)
|
||||
merged.annotation = last.annotation
|
||||
tokens[-1] = merged
|
||||
|
||||
def markup_serialize_tokens(tokens, markup_func):
|
||||
"""
|
||||
Serialize the list of tokens into a list of text chunks, calling
|
||||
markup_func around text to add annotations.
|
||||
"""
|
||||
for token in tokens:
|
||||
yield from token.pre_tags
|
||||
html = token.html()
|
||||
html = markup_func(html, token.annotation) + token.trailing_whitespace
|
||||
yield html
|
||||
yield from token.post_tags
|
||||
|
||||
|
||||
############################################################
|
||||
## HTML Diffs
|
||||
############################################################
|
||||
|
||||
def htmldiff(old_html, new_html):
|
||||
## FIXME: this should take parsed documents too, and use their body
|
||||
## or other content.
|
||||
""" Do a diff of the old and new document. The documents are HTML
|
||||
*fragments* (str/UTF8 or unicode), they are not complete documents
|
||||
(i.e., no <html> tag).
|
||||
|
||||
Returns HTML with <ins> and <del> tags added around the
|
||||
appropriate text.
|
||||
|
||||
Markup is generally ignored, with the markup from new_html
|
||||
preserved, and possibly some markup from old_html (though it is
|
||||
considered acceptable to lose some of the old markup). Only the
|
||||
words in the HTML are diffed. The exception is <img> tags, which
|
||||
are treated like words, and the href attribute of <a> tags, which
|
||||
are noted inside the tag itself when there are changes.
|
||||
"""
|
||||
old_html_tokens = tokenize(old_html)
|
||||
new_html_tokens = tokenize(new_html)
|
||||
result = htmldiff_tokens(old_html_tokens, new_html_tokens)
|
||||
try:
|
||||
result = ''.join(result).strip()
|
||||
except (ValueError, TypeError) as exc:
|
||||
print(exc)
|
||||
result = ''
|
||||
return fixup_ins_del_tags(result)
|
||||
|
||||
|
||||
def htmldiff_tokens(html1_tokens, html2_tokens):
|
||||
""" Does a diff on the tokens themselves, returning a list of text
|
||||
chunks (not tokens).
|
||||
"""
|
||||
# There are several passes as we do the differences. The tokens
|
||||
# isolate the portion of the content we care to diff; difflib does
|
||||
# all the actual hard work at that point.
|
||||
#
|
||||
# Then we must create a valid document from pieces of both the old
|
||||
# document and the new document. We generally prefer to take
|
||||
# markup from the new document, and only do a best effort attempt
|
||||
# to keep markup from the old document; anything that we can't
|
||||
# resolve we throw away. Also we try to put the deletes as close
|
||||
# to the location where we think they would have been -- because
|
||||
# we are only keeping the markup from the new document, it can be
|
||||
# fuzzy where in the new document the old text would have gone.
|
||||
# Again we just do a best effort attempt.
|
||||
s = InsensitiveSequenceMatcher(a=html1_tokens, b=html2_tokens)
|
||||
commands = s.get_opcodes()
|
||||
result = []
|
||||
for command, i1, i2, j1, j2 in commands:
|
||||
if command == 'equal':
|
||||
result.extend(expand_tokens(html2_tokens[j1:j2], equal=True))
|
||||
continue
|
||||
if command == 'insert' or command == 'replace':
|
||||
ins_tokens = expand_tokens(html2_tokens[j1:j2])
|
||||
merge_insert(ins_tokens, result)
|
||||
if command == 'delete' or command == 'replace':
|
||||
del_tokens = expand_tokens(html1_tokens[i1:i2])
|
||||
merge_delete(del_tokens, result)
|
||||
|
||||
# If deletes were inserted directly as <del> then we'd have an
|
||||
# invalid document at this point. Instead we put in special
|
||||
# markers, and when the complete diffed document has been created
|
||||
# we try to move the deletes around and resolve any problems.
|
||||
cleanup_delete(result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def expand_tokens(tokens, equal=False):
|
||||
"""Given a list of tokens, return a generator of the chunks of
|
||||
text for the data in the tokens.
|
||||
"""
|
||||
for token in tokens:
|
||||
yield from token.pre_tags
|
||||
if not equal or not token.hide_when_equal:
|
||||
yield token.html() + token.trailing_whitespace
|
||||
yield from token.post_tags
|
||||
|
||||
|
||||
def merge_insert(ins_chunks, doc: list):
|
||||
""" doc is the already-handled document (as a list of text chunks);
|
||||
here we add <ins>ins_chunks</ins> to the end of that. """
|
||||
# Though we don't throw away unbalanced start/end tags
|
||||
# (we assume there is accompanying markup later or earlier in the
|
||||
# document), we only put <ins> around the balanced portion.
|
||||
|
||||
# Legacy note: We make a choice here. Originally, we merged all sequences of
|
||||
# unbalanced tags together into separate start and end tag groups. Now, we look at
|
||||
# each sequence separately, leading to more fine-grained diffs but different
|
||||
# tag structure than before.
|
||||
|
||||
item: tuple
|
||||
for balanced, marked_chunks in group_by_first_item(mark_unbalanced(ins_chunks)):
|
||||
chunks = [item[1] for item in marked_chunks]
|
||||
if balanced == 'b':
|
||||
if doc and not doc[-1].endswith(' '):
|
||||
# Fix up the case where the word before the insert didn't end with a space.
|
||||
doc[-1] += ' '
|
||||
doc.append('<ins>')
|
||||
doc.extend(chunks)
|
||||
if doc[-1].endswith(' '):
|
||||
# We move space outside of </ins>.
|
||||
doc[-1] = doc[-1][:-1]
|
||||
doc.append('</ins> ')
|
||||
else:
|
||||
# unmatched start or end
|
||||
doc.extend(chunks)
|
||||
|
||||
|
||||
@cython.cfunc
|
||||
def tag_name_of_chunk(chunk: str) -> str:
|
||||
i: cython.Py_ssize_t
|
||||
ch: cython.Py_UCS4
|
||||
|
||||
if chunk[0] != '<':
|
||||
return ""
|
||||
|
||||
start_pos = 1
|
||||
for i, ch in enumerate(chunk):
|
||||
if ch == '/':
|
||||
start_pos = 2
|
||||
elif ch == '>':
|
||||
return chunk[start_pos:i]
|
||||
elif ch.isspace():
|
||||
return chunk[start_pos:i]
|
||||
|
||||
return chunk[start_pos:]
|
||||
|
||||
if not cython.compiled:
|
||||
# Avoid performance regression in Python due to string iteration.
|
||||
def tag_name_of_chunk(chunk: str) -> str:
|
||||
return chunk.split(None, 1)[0].strip('<>/')
|
||||
|
||||
|
||||
# These are sentinels to represent the start and end of a <del>
|
||||
# segment, until we do the cleanup phase to turn them into proper
|
||||
# markup:
|
||||
class DEL_START:
|
||||
pass
|
||||
class DEL_END:
|
||||
pass
|
||||
|
||||
|
||||
def merge_delete(del_chunks, doc: list):
|
||||
""" Adds the text chunks in del_chunks to the document doc (another
|
||||
list of text chunks) with marker to show it is a delete.
|
||||
cleanup_delete later resolves these markers into <del> tags."""
|
||||
|
||||
doc.append(DEL_START)
|
||||
doc.extend(del_chunks)
|
||||
doc.append(DEL_END)
|
||||
|
||||
|
||||
def cleanup_delete(chunks: list):
|
||||
""" Cleans up any DEL_START/DEL_END markers in the document, replacing
|
||||
them with <del></del>. To do this while keeping the document
|
||||
valid, it may need to drop some tags (either start or end tags).
|
||||
|
||||
It may also move the del into adjacent tags to try to move it to a
|
||||
similar location where it was originally located (e.g., moving a
|
||||
delete into preceding <div> tag, if the del looks like (DEL_START,
|
||||
'Text</div>', DEL_END)
|
||||
"""
|
||||
chunk_count = len(chunks)
|
||||
|
||||
i: cython.Py_ssize_t
|
||||
del_start: cython.Py_ssize_t
|
||||
del_end: cython.Py_ssize_t
|
||||
shift_start_right: cython.Py_ssize_t
|
||||
shift_end_left: cython.Py_ssize_t
|
||||
unbalanced_start: cython.Py_ssize_t
|
||||
unbalanced_end: cython.Py_ssize_t
|
||||
pos: cython.Py_ssize_t
|
||||
start_pos: cython.Py_ssize_t
|
||||
chunk: str
|
||||
|
||||
start_pos = 0
|
||||
while 1:
|
||||
# Find a pending DEL_START/DEL_END, splitting the document
|
||||
# into stuff-preceding-DEL_START, stuff-inside, and
|
||||
# stuff-following-DEL_END
|
||||
try:
|
||||
del_start = chunks.index(DEL_START, start_pos)
|
||||
except ValueError:
|
||||
# Nothing found, we've cleaned up the entire doc
|
||||
break
|
||||
else:
|
||||
del_end = chunks.index(DEL_END, del_start + 1)
|
||||
|
||||
shift_end_left = shift_start_right = 0
|
||||
unbalanced_start = unbalanced_end = 0
|
||||
deleted_chunks = mark_unbalanced(chunks[del_start+1:del_end])
|
||||
|
||||
# For unbalanced start tags at the beginning, find matching (non-deleted)
|
||||
# end tags after the current DEL_END and move the start tag outside.
|
||||
for balanced, del_chunk in deleted_chunks:
|
||||
if balanced != 'us':
|
||||
break
|
||||
unbalanced_start += 1
|
||||
unbalanced_start_name = tag_name_of_chunk(del_chunk)
|
||||
for i in range(del_end+1, chunk_count):
|
||||
if chunks[i] is DEL_START:
|
||||
break
|
||||
chunk = chunks[i]
|
||||
if chunk[0] != '<' or chunk[1] == '/':
|
||||
# Reached a word or closing tag.
|
||||
break
|
||||
name = tag_name_of_chunk(chunk)
|
||||
if name == 'ins':
|
||||
# Cannot move into an insert.
|
||||
break
|
||||
assert name != 'del', f"Unexpected delete tag: {chunk!r}"
|
||||
if name != unbalanced_start_name:
|
||||
# Avoid mixing in other start tags.
|
||||
break
|
||||
# Exclude start tag to balance the end tag.
|
||||
shift_start_right += 1
|
||||
|
||||
# For unbalanced end tags at the end, find matching (non-deleted)
|
||||
# start tags before the currend DEL_START and move the end tag outside.
|
||||
for balanced, del_chunk in reversed(deleted_chunks):
|
||||
if balanced != 'ue':
|
||||
break
|
||||
unbalanced_end += 1
|
||||
unbalanced_end_name = tag_name_of_chunk(del_chunk)
|
||||
for i in range(del_start - 1, -1, -1):
|
||||
if chunks[i] is DEL_END:
|
||||
break
|
||||
chunk = chunks[i]
|
||||
if chunk[0] == '<' and chunk[1] != '/':
|
||||
# Reached an opening tag, can we go further? Maybe not...
|
||||
break
|
||||
name = tag_name_of_chunk(chunk)
|
||||
if name == 'ins' or name == 'del':
|
||||
# Cannot move into an insert or delete.
|
||||
break
|
||||
if name != unbalanced_end_name:
|
||||
# Avoid mixing in other start tags.
|
||||
break
|
||||
# Exclude end tag to balance the start tag.
|
||||
shift_end_left += 1
|
||||
|
||||
"""
|
||||
# This is what we do below in loops, spelled out using slicing and list copying:
|
||||
|
||||
chunks[del_start - shift_end_left : del_end + shift_start_right + 1] = [
|
||||
*chunks[del_start + 1: del_start + shift_start_right + 1],
|
||||
'<del>',
|
||||
*chunks[del_start + unbalanced_start + 1 : del_end - unbalanced_end],
|
||||
'</del> ',
|
||||
*chunks[del_end - shift_end_left: del_end],
|
||||
]
|
||||
|
||||
new_del_end = del_end - 2 * shift_end_left
|
||||
assert chunks[new_del_end] == '</del> '
|
||||
del_end = new_del_end
|
||||
|
||||
if new_del_start > 0 and not chunks[new_del_start - 1].endswith(' '):
|
||||
# Fix up case where the word before us didn't have a trailing space.
|
||||
chunks[new_del_start - 1] += ' '
|
||||
if new_del_end > 0 and chunks[new_del_end - 1].endswith(' '):
|
||||
# Move space outside of </del>.
|
||||
chunks[new_del_end - 1] = chunks[new_del_end - 1][:-1]
|
||||
"""
|
||||
pos = del_start - shift_end_left
|
||||
# Move re-balanced start tags before the '<del>'.
|
||||
for i in range(del_start + 1, del_start + shift_start_right + 1):
|
||||
chunks[pos] = chunks[i]
|
||||
pos += 1
|
||||
if pos and not chunks[pos - 1].endswith(' '):
|
||||
# Fix up the case where the word before '<del>' didn't have a trailing space.
|
||||
chunks[pos - 1] += ' '
|
||||
chunks[pos] = '<del>'
|
||||
pos += 1
|
||||
# Copy only the balanced deleted content between '<del>' and '</del>'.
|
||||
for i in range(del_start + unbalanced_start + 1, del_end - unbalanced_end):
|
||||
chunks[pos] = chunks[i]
|
||||
pos += 1
|
||||
if chunks[pos - 1].endswith(' '):
|
||||
# Move trailing space outside of </del>.
|
||||
chunks[pos - 1] = chunks[pos - 1][:-1]
|
||||
chunks[pos] = '</del> '
|
||||
pos += 1
|
||||
# Move re-balanced end tags after the '</del>'.
|
||||
for i in range(del_end - shift_end_left, del_end):
|
||||
chunks[pos] = chunks[i]
|
||||
pos += 1
|
||||
# Adjust the length of the processed part in 'chunks'.
|
||||
del chunks[pos : del_end + shift_start_right + 1]
|
||||
start_pos = pos
|
||||
|
||||
|
||||
@cython.cfunc
|
||||
def mark_unbalanced(chunks) -> list:
|
||||
tag_stack = []
|
||||
marked = []
|
||||
|
||||
chunk: str
|
||||
parents: list
|
||||
|
||||
for chunk in chunks:
|
||||
if not chunk.startswith('<'):
|
||||
marked.append(('b', chunk))
|
||||
continue
|
||||
|
||||
name = tag_name_of_chunk(chunk)
|
||||
if name in empty_tags:
|
||||
marked.append(('b', chunk))
|
||||
continue
|
||||
|
||||
if chunk[1] == '/':
|
||||
# closing tag found, unwind tag stack
|
||||
while tag_stack:
|
||||
start_name, start_chunk, parents = tag_stack.pop()
|
||||
if start_name == name:
|
||||
# balanced tag closing, keep rest of stack intact
|
||||
parents.append(('b', start_chunk))
|
||||
parents.extend(marked)
|
||||
parents.append(('b', chunk))
|
||||
marked = parents
|
||||
chunk = None
|
||||
break
|
||||
else:
|
||||
# unmatched start tag
|
||||
parents.append(('us', start_chunk))
|
||||
parents.extend(marked)
|
||||
marked = parents
|
||||
|
||||
if chunk is not None:
|
||||
# unmatched end tag left after clearing the stack
|
||||
marked.append(('ue', chunk))
|
||||
else:
|
||||
# new start tag found
|
||||
tag_stack.append((name, chunk, marked))
|
||||
marked = []
|
||||
|
||||
# add any unbalanced start tags
|
||||
while tag_stack:
|
||||
_, start_chunk, parents = tag_stack.pop()
|
||||
parents.append(('us', start_chunk))
|
||||
parents.extend(marked)
|
||||
marked = parents
|
||||
|
||||
return marked
|
||||
|
||||
|
||||
class token(str):
|
||||
""" Represents a diffable token, generally a word that is displayed to
|
||||
the user. Opening tags are attached to this token when they are
|
||||
adjacent (pre_tags) and closing tags that follow the word
|
||||
(post_tags). Some exceptions occur when there are empty tags
|
||||
adjacent to a word, so there may be close tags in pre_tags, or
|
||||
open tags in post_tags.
|
||||
|
||||
We also keep track of whether the word was originally followed by
|
||||
whitespace, even though we do not want to treat the word as
|
||||
equivalent to a similar word that does not have a trailing
|
||||
space."""
|
||||
|
||||
# When this is true, the token will be eliminated from the
|
||||
# displayed diff if no change has occurred:
|
||||
hide_when_equal = False
|
||||
|
||||
def __new__(cls, text, pre_tags=None, post_tags=None, trailing_whitespace=""):
|
||||
obj = str.__new__(cls, text)
|
||||
|
||||
obj.pre_tags = pre_tags if pre_tags is not None else []
|
||||
obj.post_tags = post_tags if post_tags is not None else []
|
||||
obj.trailing_whitespace = trailing_whitespace
|
||||
|
||||
return obj
|
||||
|
||||
def __repr__(self):
|
||||
return 'token(%s, %r, %r, %r)' % (
|
||||
str.__repr__(self), self.pre_tags, self.post_tags, self.trailing_whitespace)
|
||||
|
||||
def html(self):
|
||||
return str(self)
|
||||
|
||||
class tag_token(token):
|
||||
|
||||
""" Represents a token that is actually a tag. Currently this is just
|
||||
the <img> tag, which takes up visible space just like a word but
|
||||
is only represented in a document by a tag. """
|
||||
|
||||
def __new__(cls, tag, data, html_repr, pre_tags=None,
|
||||
post_tags=None, trailing_whitespace=""):
|
||||
obj = token.__new__(cls, f"{type}: {data}",
|
||||
pre_tags=pre_tags,
|
||||
post_tags=post_tags,
|
||||
trailing_whitespace=trailing_whitespace)
|
||||
obj.tag = tag
|
||||
obj.data = data
|
||||
obj.html_repr = html_repr
|
||||
return obj
|
||||
|
||||
def __repr__(self):
|
||||
return 'tag_token(%s, %s, html_repr=%s, post_tags=%r, pre_tags=%r, trailing_whitespace=%r)' % (
|
||||
self.tag,
|
||||
self.data,
|
||||
self.html_repr,
|
||||
self.pre_tags,
|
||||
self.post_tags,
|
||||
self.trailing_whitespace)
|
||||
def html(self):
|
||||
return self.html_repr
|
||||
|
||||
class href_token(token):
|
||||
|
||||
""" Represents the href in an anchor tag. Unlike other words, we only
|
||||
show the href when it changes. """
|
||||
|
||||
hide_when_equal = True
|
||||
|
||||
def html(self):
|
||||
return ' Link: %s' % self
|
||||
|
||||
|
||||
def tokenize(html, include_hrefs=True):
|
||||
"""
|
||||
Parse the given HTML and returns token objects (words with attached tags).
|
||||
|
||||
This parses only the content of a page; anything in the head is
|
||||
ignored, and the <head> and <body> elements are themselves
|
||||
optional. The content is then parsed by lxml, which ensures the
|
||||
validity of the resulting parsed document (though lxml may make
|
||||
incorrect guesses when the markup is particular bad).
|
||||
|
||||
<ins> and <del> tags are also eliminated from the document, as
|
||||
that gets confusing.
|
||||
|
||||
If include_hrefs is true, then the href attribute of <a> tags is
|
||||
included as a special kind of diffable token."""
|
||||
if etree.iselement(html):
|
||||
body_el = html
|
||||
else:
|
||||
body_el = parse_html(html, cleanup=True)
|
||||
# Then we split the document into text chunks for each tag, word, and end tag:
|
||||
chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs)
|
||||
# Finally re-joining them into token objects:
|
||||
return fixup_chunks(chunks)
|
||||
|
||||
|
||||
def parse_html(html, cleanup=True):
|
||||
"""
|
||||
Parses an HTML fragment, returning an lxml element. Note that the HTML will be
|
||||
wrapped in a <div> tag that was not in the original document.
|
||||
|
||||
If cleanup is true, make sure there's no <head> or <body>, and get
|
||||
rid of any <ins> and <del> tags.
|
||||
"""
|
||||
if cleanup:
|
||||
# This removes any extra markup or structure like <head>:
|
||||
html = cleanup_html(html)
|
||||
return fragment_fromstring(html, create_parent=True)
|
||||
|
||||
|
||||
_search_body = re.compile(r'<body.*?>', re.I|re.S).search
|
||||
_search_end_body = re.compile(r'</body.*?>', re.I|re.S).search
|
||||
_replace_ins_del = re.compile(r'</?(ins|del).*?>', re.I|re.S).sub
|
||||
|
||||
def cleanup_html(html):
|
||||
""" This 'cleans' the HTML, meaning that any page structure is removed
|
||||
(only the contents of <body> are used, if there is any <body).
|
||||
Also <ins> and <del> tags are removed. """
|
||||
match = _search_body(html)
|
||||
if match:
|
||||
html = html[match.end():]
|
||||
match = _search_end_body(html)
|
||||
if match:
|
||||
html = html[:match.start()]
|
||||
html = _replace_ins_del('', html)
|
||||
return html
|
||||
|
||||
|
||||
def split_trailing_whitespace(word):
|
||||
"""
|
||||
This function takes a word, such as 'test\n\n' and returns ('test','\n\n')
|
||||
"""
|
||||
stripped_length = len(word.rstrip())
|
||||
return word[0:stripped_length], word[stripped_length:]
|
||||
|
||||
|
||||
def fixup_chunks(chunks):
|
||||
"""
|
||||
This function takes a list of chunks and produces a list of tokens.
|
||||
"""
|
||||
tag_accum = []
|
||||
cur_word = None
|
||||
result = []
|
||||
for chunk in chunks:
|
||||
if isinstance(chunk, tuple):
|
||||
if chunk[0] == 'img':
|
||||
src = chunk[1]
|
||||
tag, trailing_whitespace = split_trailing_whitespace(chunk[2])
|
||||
cur_word = tag_token('img', src, html_repr=tag,
|
||||
pre_tags=tag_accum,
|
||||
trailing_whitespace=trailing_whitespace)
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
|
||||
elif chunk[0] == 'href':
|
||||
href = chunk[1]
|
||||
cur_word = href_token(href, pre_tags=tag_accum, trailing_whitespace=" ")
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
continue
|
||||
|
||||
if is_word(chunk):
|
||||
chunk, trailing_whitespace = split_trailing_whitespace(chunk)
|
||||
cur_word = token(chunk, pre_tags=tag_accum, trailing_whitespace=trailing_whitespace)
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
|
||||
elif is_start_tag(chunk):
|
||||
tag_accum.append(chunk)
|
||||
|
||||
elif is_end_tag(chunk):
|
||||
if tag_accum:
|
||||
tag_accum.append(chunk)
|
||||
else:
|
||||
assert cur_word, (
|
||||
"Weird state, cur_word=%r, result=%r, chunks=%r of %r"
|
||||
% (cur_word, result, chunk, chunks))
|
||||
cur_word.post_tags.append(chunk)
|
||||
else:
|
||||
assert False
|
||||
|
||||
if not result:
|
||||
return [token('', pre_tags=tag_accum)]
|
||||
else:
|
||||
result[-1].post_tags.extend(tag_accum)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# All the tags in HTML that don't require end tags:
|
||||
empty_tags = cython.declare(frozenset, defs.empty_tags)
|
||||
|
||||
block_level_tags = cython.declare(frozenset, frozenset([
|
||||
'address',
|
||||
'blockquote',
|
||||
'center',
|
||||
'dir',
|
||||
'div',
|
||||
'dl',
|
||||
'fieldset',
|
||||
'form',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'hr',
|
||||
'isindex',
|
||||
'menu',
|
||||
'noframes',
|
||||
'noscript',
|
||||
'ol',
|
||||
'p',
|
||||
'pre',
|
||||
'table',
|
||||
'ul',
|
||||
]))
|
||||
|
||||
block_level_container_tags = cython.declare(frozenset, frozenset([
|
||||
'dd',
|
||||
'dt',
|
||||
'frameset',
|
||||
'li',
|
||||
'tbody',
|
||||
'td',
|
||||
'tfoot',
|
||||
'th',
|
||||
'thead',
|
||||
'tr',
|
||||
]))
|
||||
|
||||
any_block_level_tag = cython.declare(tuple, tuple(sorted(
|
||||
block_level_tags | block_level_container_tags))
|
||||
)
|
||||
|
||||
|
||||
def flatten_el(el, include_hrefs, skip_tag=False):
|
||||
""" Takes an lxml element el, and generates all the text chunks for
|
||||
that tag. Each start tag is a chunk, each word is a chunk, and each
|
||||
end tag is a chunk.
|
||||
|
||||
If skip_tag is true, then the outermost container tag is
|
||||
not returned (just its contents)."""
|
||||
if not skip_tag:
|
||||
if el.tag == 'img':
|
||||
yield ('img', el.get('src'), start_tag(el))
|
||||
else:
|
||||
yield start_tag(el)
|
||||
if el.tag in empty_tags and not el.text and not len(el) and not el.tail:
|
||||
return
|
||||
start_words = split_words(el.text)
|
||||
for word in start_words:
|
||||
yield html_escape(word)
|
||||
for child in el:
|
||||
yield from flatten_el(child, include_hrefs=include_hrefs)
|
||||
if el.tag == 'a' and el.get('href') and include_hrefs:
|
||||
yield ('href', el.get('href'))
|
||||
if not skip_tag:
|
||||
yield end_tag(el)
|
||||
end_words = split_words(el.tail)
|
||||
for word in end_words:
|
||||
yield html_escape(word)
|
||||
|
||||
_find_words = re.compile(r'\S+(?:\s+|$)', re.U).findall
|
||||
|
||||
def split_words(text):
|
||||
""" Splits some text into words. Includes trailing whitespace
|
||||
on each word when appropriate. """
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
words = _find_words(text)
|
||||
return words
|
||||
|
||||
_has_start_whitespace = re.compile(r'^[ \t\n\r]').match
|
||||
|
||||
def start_tag(el):
|
||||
"""
|
||||
The text representation of the start tag for a tag.
|
||||
"""
|
||||
attributes = ''.join([
|
||||
f' {name}="{html_escape(value)}"'
|
||||
for name, value in el.attrib.items()
|
||||
])
|
||||
return f'<{el.tag}{attributes}>'
|
||||
|
||||
def end_tag(el):
|
||||
""" The text representation of an end tag for a tag. Includes
|
||||
trailing whitespace when appropriate. """
|
||||
tail = el.tail
|
||||
extra = ' ' if tail and _has_start_whitespace(tail) else ''
|
||||
return f'</{el.tag}>{extra}'
|
||||
|
||||
def is_word(tok):
|
||||
return not tok.startswith('<')
|
||||
|
||||
def is_end_tag(tok):
|
||||
return tok.startswith('</')
|
||||
|
||||
def is_start_tag(tok):
|
||||
return tok.startswith('<') and not tok.startswith('</')
|
||||
|
||||
def fixup_ins_del_tags(html):
|
||||
""" Given an html string, move any <ins> or <del> tags inside of any
|
||||
block-level elements, e.g. transform <ins><p>word</p></ins> to
|
||||
<p><ins>word</ins></p> """
|
||||
doc = parse_html(html, cleanup=False)
|
||||
_fixup_ins_del_tags(doc)
|
||||
html = serialize_html_fragment(doc, skip_outer=True)
|
||||
return html
|
||||
|
||||
def serialize_html_fragment(el, skip_outer=False):
|
||||
""" Serialize a single lxml element as HTML. The serialized form
|
||||
includes the elements tail.
|
||||
|
||||
If skip_outer is true, then don't serialize the outermost tag
|
||||
"""
|
||||
assert not isinstance(el, str), (
|
||||
f"You should pass in an element, not a string like {el!r}")
|
||||
html = etree.tostring(el, method="html", encoding='unicode')
|
||||
if skip_outer:
|
||||
# Get rid of the extra starting tag:
|
||||
html = html[html.find('>')+1:]
|
||||
# Get rid of the extra end tag:
|
||||
html = html[:html.rfind('<')]
|
||||
return html.strip()
|
||||
else:
|
||||
return html
|
||||
|
||||
|
||||
@cython.cfunc
|
||||
def _fixup_ins_del_tags(doc):
|
||||
"""fixup_ins_del_tags that works on an lxml document in-place
|
||||
"""
|
||||
for el in list(doc.iter('ins', 'del')):
|
||||
if not _contains_block_level_tag(el):
|
||||
continue
|
||||
_move_el_inside_block(el, tag=el.tag)
|
||||
el.drop_tag()
|
||||
#_merge_element_contents(el)
|
||||
|
||||
|
||||
@cython.cfunc
|
||||
def _contains_block_level_tag(el):
|
||||
"""True if the element contains any block-level elements, like <p>, <td>, etc.
|
||||
"""
|
||||
for el in el.iter(*any_block_level_tag):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@cython.cfunc
|
||||
def _move_el_inside_block(el, tag):
|
||||
""" helper for _fixup_ins_del_tags; actually takes the <ins> etc tags
|
||||
and moves them inside any block-level tags. """
|
||||
makeelement = el.makeelement
|
||||
for block_level_el in el.iter(*any_block_level_tag):
|
||||
if block_level_el is not el:
|
||||
break
|
||||
else:
|
||||
# No block-level tags in any child
|
||||
children_tag = makeelement(tag)
|
||||
children_tag.text = el.text
|
||||
el.text = None
|
||||
children_tag.extend(iter(el))
|
||||
el[:] = [children_tag]
|
||||
return
|
||||
|
||||
for child in list(el):
|
||||
if _contains_block_level_tag(child):
|
||||
_move_el_inside_block(child, tag)
|
||||
if child.tail:
|
||||
tail_tag = makeelement(tag)
|
||||
tail_tag.text = child.tail
|
||||
child.tail = None
|
||||
child.addnext(tail_tag)
|
||||
else:
|
||||
child_tag = makeelement(tag)
|
||||
el.replace(child, child_tag)
|
||||
child_tag.append(child)
|
||||
if el.text:
|
||||
text_tag = makeelement(tag)
|
||||
text_tag.text = el.text
|
||||
el.text = None
|
||||
el.insert(0, text_tag)
|
||||
|
||||
|
||||
def _merge_element_contents(el):
|
||||
"""
|
||||
Removes an element, but merges its contents into its place, e.g.,
|
||||
given <p>Hi <i>there!</i></p>, if you remove the <i> element you get
|
||||
<p>Hi there!</p>
|
||||
"""
|
||||
parent = el.getparent()
|
||||
text = el.text
|
||||
tail = el.tail
|
||||
if tail:
|
||||
if not len(el):
|
||||
text = (text or '') + tail
|
||||
else:
|
||||
el[-1].tail = (el[-1].tail or '') + tail
|
||||
index = parent.index(el)
|
||||
if text:
|
||||
previous = el.getprevious()
|
||||
if previous is None:
|
||||
parent.text = (parent.text or '') + text
|
||||
else:
|
||||
previous.tail = (previous.tail or '') + text
|
||||
parent[index:index+1] = el.getchildren()
|
||||
|
||||
|
||||
@cython.final
|
||||
@cython.cclass
|
||||
class InsensitiveSequenceMatcher(SequenceMatcher):
|
||||
"""
|
||||
Acts like SequenceMatcher, but tries not to find very small equal
|
||||
blocks amidst large spans of changes
|
||||
"""
|
||||
|
||||
threshold = 2
|
||||
|
||||
@cython.cfunc
|
||||
def get_matching_blocks(self) -> list:
|
||||
size: cython.Py_ssize_t = min(len(self.b), len(self.b))
|
||||
threshold: cython.Py_ssize_t = self.threshold
|
||||
threshold = min(threshold, size // 4)
|
||||
actual = SequenceMatcher.get_matching_blocks(self)
|
||||
return [item for item in actual
|
||||
if item[2] > threshold
|
||||
or not item[2]]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from lxml.html import _diffcommand
|
||||
_diffcommand.main()
|
||||
@@ -1,299 +0,0 @@
|
||||
from lxml.etree import XPath, ElementBase
|
||||
from lxml.html import fromstring, XHTML_NAMESPACE
|
||||
from lxml.html import _forms_xpath, _options_xpath, _nons, _transform_result
|
||||
from lxml.html import defs
|
||||
import copy
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# Python 3
|
||||
basestring = str
|
||||
|
||||
__all__ = ['FormNotFound', 'fill_form', 'fill_form_html',
|
||||
'insert_errors', 'insert_errors_html',
|
||||
'DefaultErrorCreator']
|
||||
|
||||
class FormNotFound(LookupError):
|
||||
"""
|
||||
Raised when no form can be found
|
||||
"""
|
||||
|
||||
_form_name_xpath = XPath('descendant-or-self::form[name=$name]|descendant-or-self::x:form[name=$name]', namespaces={'x':XHTML_NAMESPACE})
|
||||
_input_xpath = XPath('|'.join(['descendant-or-self::'+_tag for _tag in ('input','select','textarea','x:input','x:select','x:textarea')]),
|
||||
namespaces={'x':XHTML_NAMESPACE})
|
||||
_label_for_xpath = XPath('//label[@for=$for_id]|//x:label[@for=$for_id]',
|
||||
namespaces={'x':XHTML_NAMESPACE})
|
||||
_name_xpath = XPath('descendant-or-self::*[@name=$name]')
|
||||
|
||||
def fill_form(
|
||||
el,
|
||||
values,
|
||||
form_id=None,
|
||||
form_index=None,
|
||||
):
|
||||
el = _find_form(el, form_id=form_id, form_index=form_index)
|
||||
_fill_form(el, values)
|
||||
|
||||
def fill_form_html(html, values, form_id=None, form_index=None):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
fill_form(doc, values, form_id=form_id, form_index=form_index)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
def _fill_form(el, values):
|
||||
counts = {}
|
||||
if hasattr(values, 'mixed'):
|
||||
# For Paste request parameters
|
||||
values = values.mixed()
|
||||
inputs = _input_xpath(el)
|
||||
for input in inputs:
|
||||
name = input.get('name')
|
||||
if not name:
|
||||
continue
|
||||
if _takes_multiple(input):
|
||||
value = values.get(name, [])
|
||||
if not isinstance(value, (list, tuple)):
|
||||
value = [value]
|
||||
_fill_multiple(input, value)
|
||||
elif name not in values:
|
||||
continue
|
||||
else:
|
||||
index = counts.get(name, 0)
|
||||
counts[name] = index + 1
|
||||
value = values[name]
|
||||
if isinstance(value, (list, tuple)):
|
||||
try:
|
||||
value = value[index]
|
||||
except IndexError:
|
||||
continue
|
||||
elif index > 0:
|
||||
continue
|
||||
_fill_single(input, value)
|
||||
|
||||
def _takes_multiple(input):
|
||||
if _nons(input.tag) == 'select' and input.get('multiple'):
|
||||
# FIXME: multiple="0"?
|
||||
return True
|
||||
type = input.get('type', '').lower()
|
||||
if type in ('radio', 'checkbox'):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _fill_multiple(input, value):
|
||||
type = input.get('type', '').lower()
|
||||
if type == 'checkbox':
|
||||
v = input.get('value')
|
||||
if v is None:
|
||||
if not value:
|
||||
result = False
|
||||
else:
|
||||
result = value[0]
|
||||
if isinstance(value, basestring):
|
||||
# The only valid "on" value for an unnamed checkbox is 'on'
|
||||
result = result == 'on'
|
||||
_check(input, result)
|
||||
else:
|
||||
_check(input, v in value)
|
||||
elif type == 'radio':
|
||||
v = input.get('value')
|
||||
_check(input, v in value)
|
||||
else:
|
||||
assert _nons(input.tag) == 'select'
|
||||
for option in _options_xpath(input):
|
||||
v = option.get('value')
|
||||
if v is None:
|
||||
# This seems to be the default, at least on IE
|
||||
# FIXME: but I'm not sure
|
||||
v = option.text_content()
|
||||
_select(option, v in value)
|
||||
|
||||
def _check(el, check):
|
||||
if check:
|
||||
el.set('checked', '')
|
||||
else:
|
||||
if 'checked' in el.attrib:
|
||||
del el.attrib['checked']
|
||||
|
||||
def _select(el, select):
|
||||
if select:
|
||||
el.set('selected', '')
|
||||
else:
|
||||
if 'selected' in el.attrib:
|
||||
del el.attrib['selected']
|
||||
|
||||
def _fill_single(input, value):
|
||||
if _nons(input.tag) == 'textarea':
|
||||
input.text = value
|
||||
else:
|
||||
input.set('value', value)
|
||||
|
||||
def _find_form(el, form_id=None, form_index=None):
|
||||
if form_id is None and form_index is None:
|
||||
forms = _forms_xpath(el)
|
||||
for form in forms:
|
||||
return form
|
||||
raise FormNotFound(
|
||||
"No forms in page")
|
||||
if form_id is not None:
|
||||
form = el.get_element_by_id(form_id)
|
||||
if form is not None:
|
||||
return form
|
||||
forms = _form_name_xpath(el, name=form_id)
|
||||
if forms:
|
||||
return forms[0]
|
||||
else:
|
||||
raise FormNotFound(
|
||||
"No form with the name or id of %r (forms: %s)"
|
||||
% (id, ', '.join(_find_form_ids(el))))
|
||||
if form_index is not None:
|
||||
forms = _forms_xpath(el)
|
||||
try:
|
||||
return forms[form_index]
|
||||
except IndexError:
|
||||
raise FormNotFound(
|
||||
"There is no form with the index %r (%i forms found)"
|
||||
% (form_index, len(forms)))
|
||||
|
||||
def _find_form_ids(el):
|
||||
forms = _forms_xpath(el)
|
||||
if not forms:
|
||||
yield '(no forms)'
|
||||
return
|
||||
for index, form in enumerate(forms):
|
||||
if form.get('id'):
|
||||
if form.get('name'):
|
||||
yield '%s or %s' % (form.get('id'),
|
||||
form.get('name'))
|
||||
else:
|
||||
yield form.get('id')
|
||||
elif form.get('name'):
|
||||
yield form.get('name')
|
||||
else:
|
||||
yield '(unnamed form %s)' % index
|
||||
|
||||
############################################################
|
||||
## Error filling
|
||||
############################################################
|
||||
|
||||
class DefaultErrorCreator:
|
||||
insert_before = True
|
||||
block_inside = True
|
||||
error_container_tag = 'div'
|
||||
error_message_class = 'error-message'
|
||||
error_block_class = 'error-block'
|
||||
default_message = "Invalid"
|
||||
|
||||
def __init__(self, **kw):
|
||||
for name, value in kw.items():
|
||||
if not hasattr(self, name):
|
||||
raise TypeError(
|
||||
"Unexpected keyword argument: %s" % name)
|
||||
setattr(self, name, value)
|
||||
|
||||
def __call__(self, el, is_block, message):
|
||||
error_el = el.makeelement(self.error_container_tag)
|
||||
if self.error_message_class:
|
||||
error_el.set('class', self.error_message_class)
|
||||
if is_block and self.error_block_class:
|
||||
error_el.set('class', error_el.get('class', '')+' '+self.error_block_class)
|
||||
if message is None or message == '':
|
||||
message = self.default_message
|
||||
if isinstance(message, ElementBase):
|
||||
error_el.append(message)
|
||||
else:
|
||||
assert isinstance(message, basestring), (
|
||||
"Bad message; should be a string or element: %r" % message)
|
||||
error_el.text = message or self.default_message
|
||||
if is_block and self.block_inside:
|
||||
if self.insert_before:
|
||||
error_el.tail = el.text
|
||||
el.text = None
|
||||
el.insert(0, error_el)
|
||||
else:
|
||||
el.append(error_el)
|
||||
else:
|
||||
parent = el.getparent()
|
||||
pos = parent.index(el)
|
||||
if self.insert_before:
|
||||
parent.insert(pos, error_el)
|
||||
else:
|
||||
error_el.tail = el.tail
|
||||
el.tail = None
|
||||
parent.insert(pos+1, error_el)
|
||||
|
||||
default_error_creator = DefaultErrorCreator()
|
||||
|
||||
|
||||
def insert_errors(
|
||||
el,
|
||||
errors,
|
||||
form_id=None,
|
||||
form_index=None,
|
||||
error_class="error",
|
||||
error_creator=default_error_creator,
|
||||
):
|
||||
el = _find_form(el, form_id=form_id, form_index=form_index)
|
||||
for name, error in errors.items():
|
||||
if error is None:
|
||||
continue
|
||||
for error_el, message in _find_elements_for_name(el, name, error):
|
||||
assert isinstance(message, (basestring, type(None), ElementBase)), (
|
||||
"Bad message: %r" % message)
|
||||
_insert_error(error_el, message, error_class, error_creator)
|
||||
|
||||
def insert_errors_html(html, values, **kw):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
insert_errors(doc, values, **kw)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
def _insert_error(el, error, error_class, error_creator):
|
||||
if _nons(el.tag) in defs.empty_tags or _nons(el.tag) == 'textarea':
|
||||
is_block = False
|
||||
else:
|
||||
is_block = True
|
||||
if _nons(el.tag) != 'form' and error_class:
|
||||
_add_class(el, error_class)
|
||||
if el.get('id'):
|
||||
labels = _label_for_xpath(el, for_id=el.get('id'))
|
||||
if labels:
|
||||
for label in labels:
|
||||
_add_class(label, error_class)
|
||||
error_creator(el, is_block, error)
|
||||
|
||||
def _add_class(el, class_name):
|
||||
if el.get('class'):
|
||||
el.set('class', el.get('class')+' '+class_name)
|
||||
else:
|
||||
el.set('class', class_name)
|
||||
|
||||
def _find_elements_for_name(form, name, error):
|
||||
if name is None:
|
||||
# An error for the entire form
|
||||
yield form, error
|
||||
return
|
||||
if name.startswith('#'):
|
||||
# By id
|
||||
el = form.get_element_by_id(name[1:])
|
||||
if el is not None:
|
||||
yield el, error
|
||||
return
|
||||
els = _name_xpath(form, name=name)
|
||||
if not els:
|
||||
# FIXME: should this raise an exception?
|
||||
return
|
||||
if not isinstance(error, (list, tuple)):
|
||||
yield els[0], error
|
||||
return
|
||||
# FIXME: if error is longer than els, should it raise an error?
|
||||
for el, err in zip(els, error):
|
||||
if err is None:
|
||||
continue
|
||||
yield el, err
|
||||
@@ -1,260 +0,0 @@
|
||||
"""
|
||||
An interface to html5lib that mimics the lxml.html interface.
|
||||
"""
|
||||
import sys
|
||||
import string
|
||||
|
||||
from html5lib import HTMLParser as _HTMLParser
|
||||
from html5lib.treebuilders.etree_lxml import TreeBuilder
|
||||
from lxml import etree
|
||||
from lxml.html import Element, XHTML_NAMESPACE, _contains_block_level_tag
|
||||
|
||||
# python3 compatibility
|
||||
try:
|
||||
_strings = basestring
|
||||
except NameError:
|
||||
_strings = (bytes, str)
|
||||
try:
|
||||
from urllib2 import urlopen
|
||||
except ImportError:
|
||||
from urllib.request import urlopen
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
class HTMLParser(_HTMLParser):
|
||||
"""An html5lib HTML parser with lxml as tree."""
|
||||
|
||||
def __init__(self, strict=False, **kwargs):
|
||||
_HTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
|
||||
|
||||
|
||||
try:
|
||||
from html5lib import XHTMLParser as _XHTMLParser
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
class XHTMLParser(_XHTMLParser):
|
||||
"""An html5lib XHTML Parser with lxml as tree."""
|
||||
|
||||
def __init__(self, strict=False, **kwargs):
|
||||
_XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
|
||||
|
||||
xhtml_parser = XHTMLParser()
|
||||
|
||||
|
||||
def _find_tag(tree, tag):
|
||||
elem = tree.find(tag)
|
||||
if elem is not None:
|
||||
return elem
|
||||
return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag))
|
||||
|
||||
|
||||
def document_fromstring(html, guess_charset=None, parser=None):
|
||||
"""
|
||||
Parse a whole document into a string.
|
||||
|
||||
If `guess_charset` is true, or if the input is not Unicode but a
|
||||
byte string, the `chardet` library will perform charset guessing
|
||||
on the string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
|
||||
options = {}
|
||||
if guess_charset is None and isinstance(html, bytes):
|
||||
# html5lib does not accept useChardet as an argument, if it
|
||||
# detected the html argument would produce unicode objects.
|
||||
guess_charset = True
|
||||
if guess_charset is not None:
|
||||
options['useChardet'] = guess_charset
|
||||
return parser.parse(html, **options).getroot()
|
||||
|
||||
|
||||
def fragments_fromstring(html, no_leading_text=False,
|
||||
guess_charset=None, parser=None):
|
||||
"""Parses several HTML elements, returning a list of elements.
|
||||
|
||||
The first item in the list may be a string. If no_leading_text is true,
|
||||
then it will be an error if there is leading text, and it will always be
|
||||
a list of only elements.
|
||||
|
||||
If `guess_charset` is true, the `chardet` library will perform charset
|
||||
guessing on the string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
|
||||
options = {}
|
||||
if guess_charset is None and isinstance(html, bytes):
|
||||
# html5lib does not accept useChardet as an argument, if it
|
||||
# detected the html argument would produce unicode objects.
|
||||
guess_charset = False
|
||||
if guess_charset is not None:
|
||||
options['useChardet'] = guess_charset
|
||||
children = parser.parseFragment(html, 'div', **options)
|
||||
if children and isinstance(children[0], _strings):
|
||||
if no_leading_text:
|
||||
if children[0].strip():
|
||||
raise etree.ParserError('There is leading text: %r' %
|
||||
children[0])
|
||||
del children[0]
|
||||
return children
|
||||
|
||||
|
||||
def fragment_fromstring(html, create_parent=False,
|
||||
guess_charset=None, parser=None):
|
||||
"""Parses a single HTML element; it is an error if there is more than
|
||||
one element, or if anything but whitespace precedes or follows the
|
||||
element.
|
||||
|
||||
If 'create_parent' is true (or is a tag name) then a parent node
|
||||
will be created to encapsulate the HTML in a single element. In
|
||||
this case, leading or trailing text is allowed.
|
||||
|
||||
If `guess_charset` is true, the `chardet` library will perform charset
|
||||
guessing on the string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
accept_leading_text = bool(create_parent)
|
||||
|
||||
elements = fragments_fromstring(
|
||||
html, guess_charset=guess_charset, parser=parser,
|
||||
no_leading_text=not accept_leading_text)
|
||||
|
||||
if create_parent:
|
||||
if not isinstance(create_parent, _strings):
|
||||
create_parent = 'div'
|
||||
new_root = Element(create_parent)
|
||||
if elements:
|
||||
if isinstance(elements[0], _strings):
|
||||
new_root.text = elements[0]
|
||||
del elements[0]
|
||||
new_root.extend(elements)
|
||||
return new_root
|
||||
|
||||
if not elements:
|
||||
raise etree.ParserError('No elements found')
|
||||
if len(elements) > 1:
|
||||
raise etree.ParserError('Multiple elements found')
|
||||
result = elements[0]
|
||||
if result.tail and result.tail.strip():
|
||||
raise etree.ParserError('Element followed by text: %r' % result.tail)
|
||||
result.tail = None
|
||||
return result
|
||||
|
||||
|
||||
def fromstring(html, guess_charset=None, parser=None):
|
||||
"""Parse the html, returning a single element/document.
|
||||
|
||||
This tries to minimally parse the chunk of text, without knowing if it
|
||||
is a fragment or a document.
|
||||
|
||||
'base_url' will set the document's base_url attribute (and the tree's
|
||||
docinfo.URL)
|
||||
|
||||
If `guess_charset` is true, or if the input is not Unicode but a
|
||||
byte string, the `chardet` library will perform charset guessing
|
||||
on the string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
doc = document_fromstring(html, parser=parser,
|
||||
guess_charset=guess_charset)
|
||||
|
||||
# document starts with doctype or <html>, full document!
|
||||
start = html[:50]
|
||||
if isinstance(start, bytes):
|
||||
# Allow text comparison in python3.
|
||||
# Decode as ascii, that also covers latin-1 and utf-8 for the
|
||||
# characters we need.
|
||||
start = start.decode('ascii', 'replace')
|
||||
|
||||
start = start.lstrip().lower()
|
||||
if start.startswith('<html') or start.startswith('<!doctype'):
|
||||
return doc
|
||||
|
||||
head = _find_tag(doc, 'head')
|
||||
|
||||
# if the head is not empty we have a full document
|
||||
if len(head):
|
||||
return doc
|
||||
|
||||
body = _find_tag(doc, 'body')
|
||||
|
||||
# The body has just one element, so it was probably a single
|
||||
# element passed in
|
||||
if (len(body) == 1 and (not body.text or not body.text.strip())
|
||||
and (not body[-1].tail or not body[-1].tail.strip())):
|
||||
return body[0]
|
||||
|
||||
# Now we have a body which represents a bunch of tags which have the
|
||||
# content that was passed in. We will create a fake container, which
|
||||
# is the body tag, except <body> implies too much structure.
|
||||
if _contains_block_level_tag(body):
|
||||
body.tag = 'div'
|
||||
else:
|
||||
body.tag = 'span'
|
||||
return body
|
||||
|
||||
|
||||
def parse(filename_url_or_file, guess_charset=None, parser=None):
|
||||
"""Parse a filename, URL, or file-like object into an HTML document
|
||||
tree. Note: this returns a tree, not an element. Use
|
||||
``parse(...).getroot()`` to get the document root.
|
||||
|
||||
If ``guess_charset`` is true, the ``useChardet`` option is passed into
|
||||
html5lib to enable character detection. This option is on by default
|
||||
when parsing from URLs, off by default when parsing from file(-like)
|
||||
objects (which tend to return Unicode more often than not), and on by
|
||||
default when parsing from a file path (which is read in binary mode).
|
||||
"""
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
if not isinstance(filename_url_or_file, _strings):
|
||||
fp = filename_url_or_file
|
||||
if guess_charset is None:
|
||||
# assume that file-like objects return Unicode more often than bytes
|
||||
guess_charset = False
|
||||
elif _looks_like_url(filename_url_or_file):
|
||||
fp = urlopen(filename_url_or_file)
|
||||
if guess_charset is None:
|
||||
# assume that URLs return bytes
|
||||
guess_charset = True
|
||||
else:
|
||||
fp = open(filename_url_or_file, 'rb')
|
||||
if guess_charset is None:
|
||||
guess_charset = True
|
||||
|
||||
options = {}
|
||||
# html5lib does not accept useChardet as an argument, if it
|
||||
# detected the html argument would produce unicode objects.
|
||||
if guess_charset:
|
||||
options['useChardet'] = guess_charset
|
||||
return parser.parse(fp, **options)
|
||||
|
||||
|
||||
def _looks_like_url(str):
|
||||
scheme = urlparse(str)[0]
|
||||
if not scheme:
|
||||
return False
|
||||
elif (sys.platform == 'win32' and
|
||||
scheme in string.ascii_letters
|
||||
and len(scheme) == 1):
|
||||
# looks like a 'normal' absolute path
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
html_parser = HTMLParser()
|
||||
@@ -1,314 +0,0 @@
|
||||
"""External interface to the BeautifulSoup HTML parser.
|
||||
"""
|
||||
|
||||
__all__ = ["fromstring", "parse", "convert_tree"]
|
||||
|
||||
import re
|
||||
from lxml import etree, html
|
||||
|
||||
try:
|
||||
from bs4 import (
|
||||
BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
|
||||
Declaration, Doctype)
|
||||
_DECLARATION_OR_DOCTYPE = (Declaration, Doctype)
|
||||
except ImportError:
|
||||
from BeautifulSoup import (
|
||||
BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
|
||||
Declaration)
|
||||
_DECLARATION_OR_DOCTYPE = Declaration
|
||||
|
||||
|
||||
def fromstring(data, beautifulsoup=None, makeelement=None, **bsargs):
|
||||
"""Parse a string of HTML data into an Element tree using the
|
||||
BeautifulSoup parser.
|
||||
|
||||
Returns the root ``<html>`` Element of the tree.
|
||||
|
||||
You can pass a different BeautifulSoup parser through the
|
||||
`beautifulsoup` keyword, and a diffent Element factory function
|
||||
through the `makeelement` keyword. By default, the standard
|
||||
``BeautifulSoup`` class and the default factory of `lxml.html` are
|
||||
used.
|
||||
"""
|
||||
return _parse(data, beautifulsoup, makeelement, **bsargs)
|
||||
|
||||
|
||||
def parse(file, beautifulsoup=None, makeelement=None, **bsargs):
|
||||
"""Parse a file into an ElemenTree using the BeautifulSoup parser.
|
||||
|
||||
You can pass a different BeautifulSoup parser through the
|
||||
`beautifulsoup` keyword, and a diffent Element factory function
|
||||
through the `makeelement` keyword. By default, the standard
|
||||
``BeautifulSoup`` class and the default factory of `lxml.html` are
|
||||
used.
|
||||
"""
|
||||
if not hasattr(file, 'read'):
|
||||
file = open(file)
|
||||
root = _parse(file, beautifulsoup, makeelement, **bsargs)
|
||||
return etree.ElementTree(root)
|
||||
|
||||
|
||||
def convert_tree(beautiful_soup_tree, makeelement=None):
|
||||
"""Convert a BeautifulSoup tree to a list of Element trees.
|
||||
|
||||
Returns a list instead of a single root Element to support
|
||||
HTML-like soup with more than one root element.
|
||||
|
||||
You can pass a different Element factory through the `makeelement`
|
||||
keyword.
|
||||
"""
|
||||
root = _convert_tree(beautiful_soup_tree, makeelement)
|
||||
children = root.getchildren()
|
||||
for child in children:
|
||||
root.remove(child)
|
||||
return children
|
||||
|
||||
|
||||
# helpers
|
||||
|
||||
def _parse(source, beautifulsoup, makeelement, **bsargs):
|
||||
if beautifulsoup is None:
|
||||
beautifulsoup = BeautifulSoup
|
||||
if hasattr(beautifulsoup, "HTML_ENTITIES"): # bs3
|
||||
if 'convertEntities' not in bsargs:
|
||||
bsargs['convertEntities'] = 'html'
|
||||
if hasattr(beautifulsoup, "DEFAULT_BUILDER_FEATURES"): # bs4
|
||||
if 'features' not in bsargs:
|
||||
bsargs['features'] = 'html.parser' # use Python html parser
|
||||
tree = beautifulsoup(source, **bsargs)
|
||||
root = _convert_tree(tree, makeelement)
|
||||
# from ET: wrap the document in a html root element, if necessary
|
||||
if len(root) == 1 and root[0].tag == "html":
|
||||
return root[0]
|
||||
root.tag = "html"
|
||||
return root
|
||||
|
||||
|
||||
_parse_doctype_declaration = re.compile(
|
||||
r'(?:\s|[<!])*DOCTYPE\s*HTML'
|
||||
r'(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?'
|
||||
r'(?:\s+(\'[^\']*\'|"[^"]*"))?',
|
||||
re.IGNORECASE).match
|
||||
|
||||
|
||||
class _PseudoTag:
|
||||
# Minimal imitation of BeautifulSoup.Tag
|
||||
def __init__(self, contents):
|
||||
self.name = 'html'
|
||||
self.attrs = []
|
||||
self.contents = contents
|
||||
|
||||
def __iter__(self):
|
||||
return self.contents.__iter__()
|
||||
|
||||
|
||||
def _convert_tree(beautiful_soup_tree, makeelement):
|
||||
if makeelement is None:
|
||||
makeelement = html.html_parser.makeelement
|
||||
|
||||
# Split the tree into three parts:
|
||||
# i) everything before the root element: document type
|
||||
# declaration, comments, processing instructions, whitespace
|
||||
# ii) the root(s),
|
||||
# iii) everything after the root: comments, processing
|
||||
# instructions, whitespace
|
||||
first_element_idx = last_element_idx = None
|
||||
html_root = declaration = None
|
||||
for i, e in enumerate(beautiful_soup_tree):
|
||||
if isinstance(e, Tag):
|
||||
if first_element_idx is None:
|
||||
first_element_idx = i
|
||||
last_element_idx = i
|
||||
if html_root is None and e.name and e.name.lower() == 'html':
|
||||
html_root = e
|
||||
elif declaration is None and isinstance(e, _DECLARATION_OR_DOCTYPE):
|
||||
declaration = e
|
||||
|
||||
# For a nice, well-formatted document, the variable roots below is
|
||||
# a list consisting of a single <html> element. However, the document
|
||||
# may be a soup like '<meta><head><title>Hello</head><body>Hi
|
||||
# all<\p>'. In this example roots is a list containing meta, head
|
||||
# and body elements.
|
||||
if first_element_idx is None:
|
||||
pre_root = post_root = []
|
||||
roots = beautiful_soup_tree.contents
|
||||
else:
|
||||
pre_root = beautiful_soup_tree.contents[:first_element_idx]
|
||||
roots = beautiful_soup_tree.contents[first_element_idx:last_element_idx+1]
|
||||
post_root = beautiful_soup_tree.contents[last_element_idx+1:]
|
||||
|
||||
# Reorganize so that there is one <html> root...
|
||||
if html_root is not None:
|
||||
# ... use existing one if possible, ...
|
||||
i = roots.index(html_root)
|
||||
html_root.contents = roots[:i] + html_root.contents + roots[i+1:]
|
||||
else:
|
||||
# ... otherwise create a new one.
|
||||
html_root = _PseudoTag(roots)
|
||||
|
||||
convert_node = _init_node_converters(makeelement)
|
||||
|
||||
# Process pre_root
|
||||
res_root = convert_node(html_root)
|
||||
prev = res_root
|
||||
for e in reversed(pre_root):
|
||||
converted = convert_node(e)
|
||||
if converted is not None:
|
||||
prev.addprevious(converted)
|
||||
prev = converted
|
||||
|
||||
# ditto for post_root
|
||||
prev = res_root
|
||||
for e in post_root:
|
||||
converted = convert_node(e)
|
||||
if converted is not None:
|
||||
prev.addnext(converted)
|
||||
prev = converted
|
||||
|
||||
if declaration is not None:
|
||||
try:
|
||||
# bs4 provides full Doctype string
|
||||
doctype_string = declaration.output_ready()
|
||||
except AttributeError:
|
||||
doctype_string = declaration.string
|
||||
|
||||
match = _parse_doctype_declaration(doctype_string)
|
||||
if not match:
|
||||
# Something is wrong if we end up in here. Since soupparser should
|
||||
# tolerate errors, do not raise Exception, just let it pass.
|
||||
pass
|
||||
else:
|
||||
external_id, sys_uri = match.groups()
|
||||
docinfo = res_root.getroottree().docinfo
|
||||
# strip quotes and update DOCTYPE values (any of None, '', '...')
|
||||
docinfo.public_id = external_id and external_id[1:-1]
|
||||
docinfo.system_url = sys_uri and sys_uri[1:-1]
|
||||
|
||||
return res_root
|
||||
|
||||
|
||||
def _init_node_converters(makeelement):
|
||||
converters = {}
|
||||
ordered_node_types = []
|
||||
|
||||
def converter(*types):
|
||||
def add(handler):
|
||||
for t in types:
|
||||
converters[t] = handler
|
||||
ordered_node_types.append(t)
|
||||
return handler
|
||||
return add
|
||||
|
||||
def find_best_converter(node):
|
||||
for t in ordered_node_types:
|
||||
if isinstance(node, t):
|
||||
return converters[t]
|
||||
return None
|
||||
|
||||
def convert_node(bs_node, parent=None):
|
||||
# duplicated in convert_tag() below
|
||||
try:
|
||||
handler = converters[type(bs_node)]
|
||||
except KeyError:
|
||||
handler = converters[type(bs_node)] = find_best_converter(bs_node)
|
||||
if handler is None:
|
||||
return None
|
||||
return handler(bs_node, parent)
|
||||
|
||||
def map_attrs(bs_attrs):
|
||||
if isinstance(bs_attrs, dict): # bs4
|
||||
attribs = {}
|
||||
for k, v in bs_attrs.items():
|
||||
if isinstance(v, list):
|
||||
v = " ".join(v)
|
||||
attribs[k] = unescape(v)
|
||||
else:
|
||||
attribs = {k: unescape(v) for k, v in bs_attrs}
|
||||
return attribs
|
||||
|
||||
def append_text(parent, text):
|
||||
if len(parent) == 0:
|
||||
parent.text = (parent.text or '') + text
|
||||
else:
|
||||
parent[-1].tail = (parent[-1].tail or '') + text
|
||||
|
||||
# converters are tried in order of their definition
|
||||
|
||||
@converter(Tag, _PseudoTag)
|
||||
def convert_tag(bs_node, parent):
|
||||
attrs = bs_node.attrs
|
||||
if parent is not None:
|
||||
attribs = map_attrs(attrs) if attrs else None
|
||||
res = etree.SubElement(parent, bs_node.name, attrib=attribs)
|
||||
else:
|
||||
attribs = map_attrs(attrs) if attrs else {}
|
||||
res = makeelement(bs_node.name, attrib=attribs)
|
||||
|
||||
for child in bs_node:
|
||||
# avoid double recursion by inlining convert_node(), see above
|
||||
try:
|
||||
handler = converters[type(child)]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if handler is not None:
|
||||
handler(child, res)
|
||||
continue
|
||||
convert_node(child, res)
|
||||
return res
|
||||
|
||||
@converter(Comment)
|
||||
def convert_comment(bs_node, parent):
|
||||
res = html.HtmlComment(bs_node)
|
||||
if parent is not None:
|
||||
parent.append(res)
|
||||
return res
|
||||
|
||||
@converter(ProcessingInstruction)
|
||||
def convert_pi(bs_node, parent):
|
||||
if bs_node.endswith('?'):
|
||||
# The PI is of XML style (<?as df?>) but BeautifulSoup
|
||||
# interpreted it as being SGML style (<?as df>). Fix.
|
||||
bs_node = bs_node[:-1]
|
||||
res = etree.ProcessingInstruction(*bs_node.split(' ', 1))
|
||||
if parent is not None:
|
||||
parent.append(res)
|
||||
return res
|
||||
|
||||
@converter(NavigableString)
|
||||
def convert_text(bs_node, parent):
|
||||
if parent is not None:
|
||||
append_text(parent, unescape(bs_node))
|
||||
return None
|
||||
|
||||
return convert_node
|
||||
|
||||
|
||||
# copied from ET's ElementSoup
|
||||
|
||||
try:
|
||||
from html.entities import name2codepoint # Python 3
|
||||
except ImportError:
|
||||
from htmlentitydefs import name2codepoint
|
||||
|
||||
|
||||
handle_entities = re.compile(r"&(\w+);").sub
|
||||
|
||||
|
||||
try:
|
||||
unichr
|
||||
except NameError:
|
||||
# Python 3
|
||||
unichr = chr
|
||||
|
||||
|
||||
def unescape(string):
|
||||
if not string:
|
||||
return ''
|
||||
# work around oddities in BeautifulSoup's entity handling
|
||||
def unescape_entity(m):
|
||||
try:
|
||||
return unichr(name2codepoint[m.group(1)])
|
||||
except KeyError:
|
||||
return m.group(0) # use as is
|
||||
return handle_entities(unescape_entity, string)
|
||||
@@ -1,13 +0,0 @@
|
||||
"""Doctest module for HTML comparison.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import lxml.html.usedoctest
|
||||
>>> # now do your HTML doctests ...
|
||||
|
||||
See `lxml.doctestcompare`.
|
||||
"""
|
||||
|
||||
from lxml import doctestcompare
|
||||
|
||||
doctestcompare.temp_install(html=True, del_module=__name__)
|
||||
@@ -1,25 +0,0 @@
|
||||
from lxml.includes.tree cimport xmlDoc, xmlOutputBuffer, xmlChar
|
||||
from lxml.includes.xpath cimport xmlNodeSet
|
||||
|
||||
cdef extern from "libxml/c14n.h" nogil:
|
||||
cdef int xmlC14NDocDumpMemory(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
xmlChar** doc_txt_ptr)
|
||||
|
||||
cdef int xmlC14NDocSave(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
char* filename,
|
||||
int compression)
|
||||
|
||||
cdef int xmlC14NDocSaveTo(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
xmlOutputBuffer* buffer)
|
||||
@@ -1,3 +0,0 @@
|
||||
cdef extern from "etree_defs.h":
|
||||
cdef bint ENABLE_THREADING
|
||||
cdef bint ENABLE_SCHEMATRON
|
||||
@@ -1,18 +0,0 @@
|
||||
from lxml.includes cimport tree
|
||||
from lxml.includes.tree cimport xmlDoc, xmlDtd
|
||||
|
||||
cdef extern from "libxml/valid.h" nogil:
|
||||
ctypedef void (*xmlValidityErrorFunc)(void * ctx, const char * msg, ...) noexcept
|
||||
ctypedef void (*xmlValidityWarningFunc)(void * ctx, const char * msg, ...) noexcept
|
||||
|
||||
ctypedef struct xmlValidCtxt:
|
||||
void *userData
|
||||
xmlValidityErrorFunc error
|
||||
xmlValidityWarningFunc warning
|
||||
|
||||
cdef xmlValidCtxt* xmlNewValidCtxt()
|
||||
cdef void xmlFreeValidCtxt(xmlValidCtxt* cur)
|
||||
|
||||
cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd)
|
||||
cdef tree.xmlElement* xmlGetDtdElementDesc(
|
||||
xmlDtd* dtd, tree.const_xmlChar* name)
|
||||
@@ -1,390 +0,0 @@
|
||||
#ifndef HAS_ETREE_DEFS_H
|
||||
#define HAS_ETREE_DEFS_H
|
||||
|
||||
/* quick check for Python/libxml2/libxslt devel setup */
|
||||
#include "Python.h"
|
||||
#ifndef PY_VERSION_HEX
|
||||
# error the development package of Python (header files etc.) is not installed correctly
|
||||
#elif PY_VERSION_HEX < 0x03060000
|
||||
# error this version of lxml requires Python 3.6 or later
|
||||
#endif
|
||||
|
||||
#include "libxml/xmlversion.h"
|
||||
#ifndef LIBXML_VERSION
|
||||
# error the development package of libxml2 (header files etc.) is not installed correctly
|
||||
#elif LIBXML_VERSION < 20700
|
||||
# error minimum required version of libxml2 is 2.7.0
|
||||
#endif
|
||||
|
||||
#include "libxslt/xsltconfig.h"
|
||||
#ifndef LIBXSLT_VERSION
|
||||
# error the development package of libxslt (header files etc.) is not installed correctly
|
||||
#elif LIBXSLT_VERSION < 10123
|
||||
# error minimum required version of libxslt is 1.1.23
|
||||
#endif
|
||||
|
||||
|
||||
/* v_arg functions */
|
||||
#define va_int(ap) va_arg(ap, int)
|
||||
#define va_charptr(ap) va_arg(ap, char *)
|
||||
|
||||
#ifdef PYPY_VERSION
|
||||
# define IS_PYPY 1
|
||||
#else
|
||||
# define IS_PYPY 0
|
||||
#endif
|
||||
|
||||
/* unused */
|
||||
#define IS_PYTHON2 0
|
||||
#define IS_PYTHON3 1
|
||||
#undef LXML_UNICODE_STRINGS
|
||||
#define LXML_UNICODE_STRINGS 1
|
||||
|
||||
#if !IS_PYPY
|
||||
# define PyWeakref_LockObject(obj) (NULL)
|
||||
#endif
|
||||
|
||||
/* Threading is not currently supported by PyPy */
|
||||
#if IS_PYPY
|
||||
# ifndef WITHOUT_THREADING
|
||||
# define WITHOUT_THREADING
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if IS_PYPY
|
||||
# ifndef PyUnicode_FromFormat
|
||||
# define PyUnicode_FromFormat PyString_FromFormat
|
||||
# endif
|
||||
# if !defined(PyBytes_FromFormat)
|
||||
# ifdef PyString_FromFormat
|
||||
# define PyBytes_FromFormat PyString_FromFormat
|
||||
# else
|
||||
#include <stdarg.h>
|
||||
static PyObject* PyBytes_FromFormat(const char* format, ...) {
|
||||
PyObject *string;
|
||||
va_list vargs;
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
va_start(vargs, format);
|
||||
#else
|
||||
va_start(vargs);
|
||||
#endif
|
||||
string = PyUnicode_FromFormatV(format, vargs);
|
||||
va_end(vargs);
|
||||
if (string && PyUnicode_Check(string)) {
|
||||
PyObject *bstring = PyUnicode_AsUTF8String(string);
|
||||
Py_DECREF(string);
|
||||
string = bstring;
|
||||
}
|
||||
if (string && !PyBytes_CheckExact(string)) {
|
||||
Py_DECREF(string);
|
||||
string = NULL;
|
||||
PyErr_SetString(PyExc_TypeError, "String formatting and encoding failed to return bytes object");
|
||||
}
|
||||
return string;
|
||||
}
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if PY_VERSION_HEX >= 0x030B00A1
|
||||
/* Python 3.12 doesn't have wstr Unicode strings any more. */
|
||||
#undef PyUnicode_GET_DATA_SIZE
|
||||
#define PyUnicode_GET_DATA_SIZE(ustr) (0)
|
||||
#undef PyUnicode_AS_DATA
|
||||
#define PyUnicode_AS_DATA(ustr) (NULL)
|
||||
#undef PyUnicode_IS_READY
|
||||
#define PyUnicode_IS_READY(ustr) (1)
|
||||
#endif
|
||||
|
||||
#ifdef WITHOUT_THREADING
|
||||
# undef PyEval_SaveThread
|
||||
# define PyEval_SaveThread() (NULL)
|
||||
# undef PyEval_RestoreThread
|
||||
# define PyEval_RestoreThread(state) if (state); else {}
|
||||
# undef PyGILState_Ensure
|
||||
# define PyGILState_Ensure() (PyGILState_UNLOCKED)
|
||||
# undef PyGILState_Release
|
||||
# define PyGILState_Release(state) if (state); else {}
|
||||
# undef Py_UNBLOCK_THREADS
|
||||
# define Py_UNBLOCK_THREADS _save = NULL;
|
||||
# undef Py_BLOCK_THREADS
|
||||
# define Py_BLOCK_THREADS if (_save); else {}
|
||||
#endif
|
||||
|
||||
#ifdef WITHOUT_THREADING
|
||||
# define ENABLE_THREADING 0
|
||||
#else
|
||||
# define ENABLE_THREADING 1
|
||||
#endif
|
||||
|
||||
#if LIBXML_VERSION < 20704
|
||||
/* FIXME: hack to make new error reporting compile in old libxml2 versions */
|
||||
# define xmlStructuredErrorContext NULL
|
||||
# define xmlXIncludeProcessTreeFlagsData(n,o,d) xmlXIncludeProcessTreeFlags(n,o)
|
||||
#endif
|
||||
|
||||
/* schematron was added in libxml2 2.6.21 */
|
||||
#ifdef LIBXML_SCHEMATRON_ENABLED
|
||||
# define ENABLE_SCHEMATRON 1
|
||||
#else
|
||||
# define ENABLE_SCHEMATRON 0
|
||||
# define XML_SCHEMATRON_OUT_QUIET 0
|
||||
# define XML_SCHEMATRON_OUT_XML 0
|
||||
# define XML_SCHEMATRON_OUT_ERROR 0
|
||||
typedef void xmlSchematron;
|
||||
typedef void xmlSchematronParserCtxt;
|
||||
typedef void xmlSchematronValidCtxt;
|
||||
# define xmlSchematronNewDocParserCtxt(doc) NULL
|
||||
# define xmlSchematronNewParserCtxt(file) NULL
|
||||
# define xmlSchematronParse(ctxt) NULL
|
||||
# define xmlSchematronFreeParserCtxt(ctxt)
|
||||
# define xmlSchematronFree(schema)
|
||||
# define xmlSchematronNewValidCtxt(schema, options) NULL
|
||||
# define xmlSchematronValidateDoc(ctxt, doc) 0
|
||||
# define xmlSchematronFreeValidCtxt(ctxt)
|
||||
# define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data)
|
||||
#endif
|
||||
|
||||
#if LIBXML_VERSION < 20708
|
||||
# define HTML_PARSE_NODEFDTD 4
|
||||
#endif
|
||||
#if LIBXML_VERSION < 20900
|
||||
# define XML_PARSE_BIG_LINES 0x400000
|
||||
#endif
|
||||
#if LIBXML_VERSION < 21300
|
||||
# define XML_PARSE_NO_XXE 0x800000
|
||||
#endif
|
||||
#if LIBXML_VERSION < 21400
|
||||
# define XML_PARSE_UNZIP 0x1000000
|
||||
# define XML_PARSE_NO_SYS_CATALOG 0x2000000
|
||||
# define XML_PARSE_CATALOG_PI 0x4000000
|
||||
#endif
|
||||
#if LIBXML_VERSION < 21500
|
||||
# define XML_PARSE_SKIP_IDS 0x8000000
|
||||
#endif
|
||||
|
||||
#include "libxml/tree.h"
|
||||
#ifndef LIBXML2_NEW_BUFFER
|
||||
typedef xmlBuffer xmlBuf;
|
||||
# define xmlBufContent(buf) xmlBufferContent(buf)
|
||||
# define xmlBufUse(buf) xmlBufferLength(buf)
|
||||
#endif
|
||||
|
||||
#if LIBXML_VERSION < 21500
|
||||
# define xmlCtxtIsStopped(p_ctxt) ((p_ctxt)->disableSAX != 0)
|
||||
#endif
|
||||
|
||||
/* libexslt 1.1.25+ support EXSLT functions in XPath */
|
||||
#if LIBXSLT_VERSION < 10125
|
||||
#define exsltDateXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltSetsXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltMathXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltStrXpathCtxtRegister(ctxt, prefix)
|
||||
#endif
|
||||
|
||||
#define LXML_GET_XSLT_ENCODING(result_var, style) XSLT_GET_IMPORT_PTR(result_var, style, encoding)
|
||||
|
||||
/* work around MSDEV 6.0 */
|
||||
#if (_MSC_VER == 1200) && (WINVER < 0x0500)
|
||||
long _ftol( double ); //defined by VC6 C libs
|
||||
long _ftol2( double dblSource ) { return _ftol( dblSource ); }
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
/* Test for GCC > 2.95 */
|
||||
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
|
||||
#define unlikely_condition(x) __builtin_expect((x), 0)
|
||||
#else /* __GNUC__ > 2 ... */
|
||||
#define unlikely_condition(x) (x)
|
||||
#endif /* __GNUC__ > 2 ... */
|
||||
#else /* __GNUC__ */
|
||||
#define unlikely_condition(x) (x)
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#ifndef Py_TYPE
|
||||
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
|
||||
#endif
|
||||
|
||||
#define _fqtypename(o) ((Py_TYPE(o))->tp_name)
|
||||
|
||||
#define lxml_malloc(count, item_size) \
|
||||
(unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
|
||||
(PyMem_Malloc((count) * item_size)))
|
||||
|
||||
#define lxml_realloc(mem, count, item_size) \
|
||||
(unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
|
||||
(PyMem_Realloc(mem, (count) * item_size)))
|
||||
|
||||
#define lxml_free(mem) PyMem_Free(mem)
|
||||
|
||||
#define _isString(obj) (PyUnicode_Check(obj) || PyBytes_Check(obj))
|
||||
|
||||
#define _isElement(c_node) \
|
||||
(((c_node)->type == XML_ELEMENT_NODE) || \
|
||||
((c_node)->type == XML_COMMENT_NODE) || \
|
||||
((c_node)->type == XML_ENTITY_REF_NODE) || \
|
||||
((c_node)->type == XML_PI_NODE))
|
||||
|
||||
#define _isElementOrXInclude(c_node) \
|
||||
(_isElement(c_node) || \
|
||||
((c_node)->type == XML_XINCLUDE_START) || \
|
||||
((c_node)->type == XML_XINCLUDE_END))
|
||||
|
||||
#define _getNs(c_node) \
|
||||
(((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
|
||||
|
||||
|
||||
#include "string.h"
|
||||
static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) {
|
||||
xmlDoc *c_doc;
|
||||
void *context;
|
||||
*is_owned = 0;
|
||||
if (unlikely_condition(!PyCapsule_IsValid(capsule, (const char*)"libxml2:xmlDoc"))) {
|
||||
PyErr_SetString(
|
||||
PyExc_TypeError,
|
||||
"Not a valid capsule. The capsule argument must be a capsule object with name libxml2:xmlDoc");
|
||||
return NULL;
|
||||
}
|
||||
c_doc = (xmlDoc*) PyCapsule_GetPointer(capsule, (const char*)"libxml2:xmlDoc");
|
||||
if (unlikely_condition(!c_doc)) return NULL;
|
||||
|
||||
if (unlikely_condition(c_doc->type != XML_DOCUMENT_NODE && c_doc->type != XML_HTML_DOCUMENT_NODE)) {
|
||||
PyErr_Format(
|
||||
PyExc_ValueError,
|
||||
"Illegal document provided: expected XML or HTML, found %d", (int)c_doc->type);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
context = PyCapsule_GetContext(capsule);
|
||||
if (unlikely_condition(!context && PyErr_Occurred())) return NULL;
|
||||
if (context && strcmp((const char*) context, "destructor:xmlFreeDoc") == 0) {
|
||||
/* take ownership by setting destructor to NULL */
|
||||
if (PyCapsule_SetDestructor(capsule, NULL) == 0) {
|
||||
/* ownership transferred => invalidate capsule by clearing its name */
|
||||
if (unlikely_condition(PyCapsule_SetName(capsule, NULL))) {
|
||||
/* this should never happen since everything above succeeded */
|
||||
xmlFreeDoc(c_doc);
|
||||
return NULL;
|
||||
}
|
||||
*is_owned = 1;
|
||||
}
|
||||
}
|
||||
return c_doc;
|
||||
}
|
||||
|
||||
/* Macro pair implementation of a depth first tree walker
|
||||
*
|
||||
* Calls the code block between the BEGIN and END macros for all elements
|
||||
* below c_tree_top (exclusively), starting at c_node (inclusively iff
|
||||
* 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes
|
||||
* that match _isElement(), the normal variant will stop on every node
|
||||
* except text nodes.
|
||||
*
|
||||
* To traverse the node and all of its children and siblings in Pyrex, call
|
||||
* cdef xmlNode* some_node
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* To traverse only the children and siblings of a node, call
|
||||
* cdef xmlNode* some_node
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* To traverse only the children, do:
|
||||
* cdef xmlNode* some_node
|
||||
* some_node = parent_node.children
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* NOTE: 'some_node' MUST be a plain 'xmlNode*' !
|
||||
*
|
||||
* NOTE: parent modification during the walk can divert the iterator, but
|
||||
* should not segfault !
|
||||
*/
|
||||
|
||||
#define _LX__ELEMENT_MATCH(c_node, only_elements) \
|
||||
((only_elements) ? (_isElement(c_node)) : 1)
|
||||
|
||||
#define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \
|
||||
while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \
|
||||
c_node = c_node->next;
|
||||
|
||||
#define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \
|
||||
{ \
|
||||
/* walk through children first */ \
|
||||
xmlNode* _lx__next = c_node->children; \
|
||||
if (_lx__next != 0) { \
|
||||
if (c_node->type == XML_ENTITY_REF_NODE || c_node->type == XML_DTD_NODE) { \
|
||||
_lx__next = 0; \
|
||||
} else { \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
} \
|
||||
} \
|
||||
if ((_lx__next == 0) && (c_node != c_stop_node)) { \
|
||||
/* try siblings */ \
|
||||
_lx__next = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
/* back off through parents */ \
|
||||
while (_lx__next == 0) { \
|
||||
c_node = c_node->parent; \
|
||||
if (c_node == 0) \
|
||||
break; \
|
||||
if (c_node == c_stop_node) \
|
||||
break; \
|
||||
if ((only_elements) && !_isElement(c_node)) \
|
||||
break; \
|
||||
/* we already traversed the parents -> siblings */ \
|
||||
_lx__next = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
} \
|
||||
} \
|
||||
c_node = _lx__next; \
|
||||
}
|
||||
|
||||
#define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \
|
||||
{ \
|
||||
if (c_node != 0) { \
|
||||
const xmlNode* _lx__tree_top = (c_tree_top); \
|
||||
const int _lx__only_elements = (only_elements); \
|
||||
/* make sure we start at an element */ \
|
||||
if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \
|
||||
/* we skip the node, so 'inclusive' is irrelevant */ \
|
||||
if (c_node == _lx__tree_top) \
|
||||
c_node = 0; /* nothing to traverse */ \
|
||||
else { \
|
||||
c_node = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \
|
||||
} \
|
||||
} else if (! (inclusive)) { \
|
||||
/* skip the first node */ \
|
||||
_LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
|
||||
} \
|
||||
\
|
||||
/* now run the user code on the elements we find */ \
|
||||
while (c_node != 0) { \
|
||||
/* here goes the code to be run for each element */
|
||||
|
||||
#define _LX__END_FOR_EACH_FROM(c_node) \
|
||||
_LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
|
||||
_LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1)
|
||||
|
||||
#define END_FOR_EACH_ELEMENT_FROM(c_node) \
|
||||
_LX__END_FOR_EACH_FROM(c_node)
|
||||
|
||||
#define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \
|
||||
_LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0)
|
||||
|
||||
#define END_FOR_EACH_FROM(c_node) \
|
||||
_LX__END_FOR_EACH_FROM(c_node)
|
||||
|
||||
|
||||
#endif /* HAS_ETREE_DEFS_H */
|
||||
@@ -1,237 +0,0 @@
|
||||
# public Cython/C interface to lxml.etree
|
||||
|
||||
from lxml.includes cimport tree
|
||||
from lxml.includes.tree cimport const_xmlChar
|
||||
|
||||
cdef extern from "lxml-version.h":
|
||||
cdef char* LXML_VERSION_STRING
|
||||
|
||||
cdef extern from "etree_defs.h":
|
||||
# test if c_node is considered an Element (i.e. Element, Comment, etc.)
|
||||
cdef bint _isElement(tree.xmlNode* c_node) noexcept nogil
|
||||
|
||||
# return the namespace URI of the node or NULL
|
||||
cdef const_xmlChar* _getNs(tree.xmlNode* node) noexcept nogil
|
||||
|
||||
# pair of macros for tree traversal
|
||||
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(tree.xmlNode* tree_top,
|
||||
tree.xmlNode* start_node,
|
||||
int start_node_inclusive) noexcept nogil
|
||||
cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) noexcept nogil
|
||||
|
||||
cdef extern from "etree_api.h":
|
||||
|
||||
# first function to call!
|
||||
cdef int import_lxml__etree() except -1
|
||||
|
||||
##########################################################################
|
||||
# public ElementTree API classes
|
||||
|
||||
cdef class lxml.etree._Document [ object LxmlDocument ]:
|
||||
cdef tree.xmlDoc* _c_doc
|
||||
|
||||
cdef class lxml.etree._Element [ object LxmlElement ]:
|
||||
cdef _Document _doc
|
||||
cdef tree.xmlNode* _c_node
|
||||
|
||||
cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]:
|
||||
pass
|
||||
|
||||
cdef class lxml.etree._ElementTree [ object LxmlElementTree ]:
|
||||
cdef _Document _doc
|
||||
cdef _Element _context_node
|
||||
|
||||
cdef class lxml.etree.ElementClassLookup [ object LxmlElementClassLookup ]:
|
||||
cdef object (*_lookup_function)(object, _Document, tree.xmlNode*)
|
||||
|
||||
cdef class lxml.etree.FallbackElementClassLookup(ElementClassLookup) \
|
||||
[ object LxmlFallbackElementClassLookup ]:
|
||||
cdef ElementClassLookup fallback
|
||||
cdef object (*_fallback_function)(object, _Document, tree.xmlNode*)
|
||||
|
||||
##########################################################################
|
||||
# creating Element objects
|
||||
|
||||
# create an Element for a C-node in the Document
|
||||
cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node)
|
||||
|
||||
# create an ElementTree for an Element
|
||||
cdef _ElementTree elementTreeFactory(_Element context_node)
|
||||
|
||||
# create an ElementTree subclass for an Element
|
||||
cdef _ElementTree newElementTree(_Element context_node, object subclass)
|
||||
|
||||
# create an ElementTree from an external document
|
||||
cdef _ElementTree adoptExternalDocument(tree.xmlDoc* c_doc, parser, bint is_owned)
|
||||
|
||||
# create a new Element for an existing or new document (doc = None)
|
||||
# builds Python object after setting text, tail, namespaces and attributes
|
||||
cdef _Element makeElement(tag, _Document doc, parser,
|
||||
text, tail, attrib, nsmap)
|
||||
|
||||
# create a new SubElement for an existing parent
|
||||
# builds Python object after setting text, tail, namespaces and attributes
|
||||
cdef _Element makeSubElement(_Element parent, tag, text, tail,
|
||||
attrib, nsmap)
|
||||
|
||||
# deep copy a node to include it in the Document
|
||||
cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root)
|
||||
|
||||
# set the internal lookup function for Element/Comment/PI classes
|
||||
# use setElementClassLookupFunction(NULL, None) to reset it
|
||||
# note that the lookup function *must always* return an _Element subclass!
|
||||
cdef void setElementClassLookupFunction(
|
||||
object (*function)(object, _Document, tree.xmlNode*), object state)
|
||||
|
||||
# lookup function that always returns the default Element class
|
||||
# note that the first argument is expected to be None!
|
||||
cdef object lookupDefaultElementClass(_1, _Document _2,
|
||||
tree.xmlNode* c_node)
|
||||
|
||||
# lookup function for namespace/tag specific Element classes
|
||||
# note that the first argument is expected to be None!
|
||||
cdef object lookupNamespaceElementClass(_1, _Document _2,
|
||||
tree.xmlNode* c_node)
|
||||
|
||||
# call the fallback lookup function of a FallbackElementClassLookup
|
||||
cdef object callLookupFallback(FallbackElementClassLookup lookup,
|
||||
_Document doc, tree.xmlNode* c_node)
|
||||
|
||||
##########################################################################
|
||||
# XML attribute access
|
||||
|
||||
# return an attribute value for a C attribute on a C element node
|
||||
cdef unicode attributeValue(tree.xmlNode* c_element,
|
||||
tree.xmlAttr* c_attrib_node)
|
||||
|
||||
# return the value of the attribute with 'ns' and 'name' (or None)
|
||||
cdef unicode attributeValueFromNsName(tree.xmlNode* c_element,
|
||||
const_xmlChar* c_ns, const_xmlChar* c_name)
|
||||
|
||||
# return the value of attribute "{ns}name", or the default value
|
||||
cdef object getAttributeValue(_Element element, key, default)
|
||||
|
||||
# return an iterator over attribute names (1), values (2) or items (3)
|
||||
# attributes must not be removed during iteration!
|
||||
cdef object iterattributes(_Element element, int keysvalues)
|
||||
|
||||
# return the list of all attribute names (1), values (2) or items (3)
|
||||
cdef list collectAttributes(tree.xmlNode* c_element, int keysvalues)
|
||||
|
||||
# set an attribute value on an element
|
||||
# on failure, sets an exception and returns -1
|
||||
cdef int setAttributeValue(_Element element, key, value) except -1
|
||||
|
||||
# delete an attribute
|
||||
# on failure, sets an exception and returns -1
|
||||
cdef int delAttribute(_Element element, key) except -1
|
||||
|
||||
# delete an attribute based on name and namespace URI
|
||||
# returns -1 if the attribute was not found (no exception)
|
||||
cdef int delAttributeFromNsName(tree.xmlNode* c_element,
|
||||
const_xmlChar* c_href, const_xmlChar* c_name) noexcept
|
||||
|
||||
##########################################################################
|
||||
# XML node helper functions
|
||||
|
||||
# check if the element has at least one child
|
||||
cdef bint hasChild(tree.xmlNode* c_node) noexcept nogil
|
||||
|
||||
# find child element number 'index' (supports negative indexes)
|
||||
cdef tree.xmlNode* findChild(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) noexcept nogil
|
||||
|
||||
# find child element number 'index' starting at first one
|
||||
cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) nogil
|
||||
|
||||
# find child element number 'index' starting at last one
|
||||
cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) nogil
|
||||
|
||||
# return next/previous sibling element of the node
|
||||
cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) nogil
|
||||
cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) nogil
|
||||
|
||||
##########################################################################
|
||||
# iterators (DEPRECATED API, don't use in new code!)
|
||||
|
||||
cdef class lxml.etree._ElementTagMatcher [ object LxmlElementTagMatcher ]:
|
||||
cdef char* _href
|
||||
cdef char* _name
|
||||
|
||||
# store "{ns}tag" (or None) filter for this matcher or element iterator
|
||||
# ** unless _href *and* _name are set up 'by hand', this function *must*
|
||||
# ** be called when subclassing the iterator below!
|
||||
cdef void initTagMatch(_ElementTagMatcher matcher, tag)
|
||||
|
||||
cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [
|
||||
object LxmlElementIterator ]:
|
||||
cdef _Element _node
|
||||
cdef tree.xmlNode* (*_next_element)(tree.xmlNode*)
|
||||
|
||||
# store the initial node of the iterator if it matches the required tag
|
||||
# or its next matching sibling if not
|
||||
cdef void iteratorStoreNext(_ElementIterator iterator, _Element node)
|
||||
|
||||
##########################################################################
|
||||
# other helper functions
|
||||
|
||||
# check if a C node matches a tag name and namespace
|
||||
# (NULL allowed for each => always matches)
|
||||
cdef int tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name)
|
||||
|
||||
# convert a UTF-8 char* to a Python unicode string
|
||||
cdef unicode pyunicode(const_xmlChar* s)
|
||||
|
||||
# convert the string to UTF-8 using the normal lxml.etree semantics
|
||||
cdef bytes utf8(object s)
|
||||
|
||||
# split a tag into a (URI, name) tuple, return None as URI for '{}tag'
|
||||
cdef tuple getNsTag(object tag)
|
||||
|
||||
# split a tag into a (URI, name) tuple, return b'' as URI for '{}tag'
|
||||
cdef tuple getNsTagWithEmptyNs(object tag)
|
||||
|
||||
# get the "{ns}tag" string for a C node
|
||||
cdef unicode namespacedName(tree.xmlNode* c_node)
|
||||
|
||||
# get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL)
|
||||
cdef unicode namespacedNameFromNsName(const_xmlChar* c_ns, const_xmlChar* c_tag)
|
||||
|
||||
# check if the node has a text value (which may be '')
|
||||
cdef bint hasText(tree.xmlNode* c_node) nogil
|
||||
|
||||
# check if the node has a tail value (which may be '')
|
||||
cdef bint hasTail(tree.xmlNode* c_node) nogil
|
||||
|
||||
# get the text content of an element (or None)
|
||||
cdef unicode textOf(tree.xmlNode* c_node)
|
||||
|
||||
# get the tail content of an element (or None)
|
||||
cdef unicode tailOf(tree.xmlNode* c_node)
|
||||
|
||||
# set the text value of an element
|
||||
cdef int setNodeText(tree.xmlNode* c_node, text) except -1
|
||||
|
||||
# set the tail text value of an element
|
||||
cdef int setTailText(tree.xmlNode* c_node, text) except -1
|
||||
|
||||
# append an element to the children of a parent element
|
||||
# deprecated: don't use, does not propagate exceptions!
|
||||
# use appendChildToElement() instead
|
||||
cdef void appendChild(_Element parent, _Element child)
|
||||
|
||||
# added in lxml 3.3 as a safe replacement for appendChild()
|
||||
# return -1 for exception, 0 for ok
|
||||
cdef int appendChildToElement(_Element parent, _Element child) except -1
|
||||
|
||||
# recursively lookup a namespace in element or ancestors, or create it
|
||||
cdef tree.xmlNs* findOrBuildNodeNsPrefix(
|
||||
_Document doc, tree.xmlNode* c_node, const_xmlChar* href, const_xmlChar* prefix)
|
||||
|
||||
# find the Document of an Element, ElementTree or Document (itself!)
|
||||
cdef _Document documentOrRaise(object input)
|
||||
|
||||
# find the root Element of an Element (itself!), ElementTree or Document
|
||||
cdef _Element rootNodeOrRaise(object input)
|
||||
@@ -1,45 +0,0 @@
|
||||
/* Copyright (C) 2003 Free Software Foundation, Inc.
|
||||
This file is part of the GNU CHARSET Library.
|
||||
|
||||
The GNU CHARSET Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
The GNU CHARSET Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the GNU CHARSET Library; see the file COPYING.LIB. If not,
|
||||
see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _LIBCHARSET_H
|
||||
#define _LIBCHARSET_H
|
||||
|
||||
#include <localcharset.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Support for relocatable packages. */
|
||||
|
||||
/* Sets the original and the current installation prefix of the package.
|
||||
Relocation simply replaces a pathname starting with the original prefix
|
||||
by the corresponding pathname with the current prefix instead. Both
|
||||
prefixes should be directory names without trailing slash (i.e. use ""
|
||||
instead of "/"). */
|
||||
extern void libcharset_set_relocation_prefix (const char *orig_prefix,
|
||||
const char *curr_prefix);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _LIBCHARSET_H */
|
||||
@@ -1,137 +0,0 @@
|
||||
/* Determine a canonical name for the current locale's character encoding.
|
||||
Copyright (C) 2000-2003, 2009-2019 Free Software Foundation, Inc.
|
||||
This file is part of the GNU CHARSET Library.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, see <https://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _LOCALCHARSET_H
|
||||
#define _LOCALCHARSET_H
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Determine the current locale's character encoding, and canonicalize it
|
||||
into one of the canonical names listed below.
|
||||
The result must not be freed; it is statically allocated. The result
|
||||
becomes invalid when setlocale() is used to change the global locale, or
|
||||
when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
|
||||
is changed; threads in multithreaded programs should not do this.
|
||||
If the canonical name cannot be determined, the result is a non-canonical
|
||||
name. */
|
||||
extern const char * locale_charset (void);
|
||||
|
||||
/* About GNU canonical names for character encodings:
|
||||
|
||||
Every canonical name must be supported by GNU libiconv. Support by GNU libc
|
||||
is also desirable.
|
||||
|
||||
The name is case insensitive. Usually an upper case MIME charset name is
|
||||
preferred.
|
||||
|
||||
The current list of these GNU canonical names is:
|
||||
|
||||
name MIME? used by which systems
|
||||
(darwin = Mac OS X, windows = native Windows)
|
||||
|
||||
ASCII, ANSI_X3.4-1968 glibc solaris freebsd netbsd darwin minix cygwin
|
||||
ISO-8859-1 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos
|
||||
ISO-8859-2 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos
|
||||
ISO-8859-3 Y glibc solaris cygwin
|
||||
ISO-8859-4 Y hpux osf solaris freebsd netbsd openbsd darwin
|
||||
ISO-8859-5 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos
|
||||
ISO-8859-6 Y glibc aix hpux solaris cygwin
|
||||
ISO-8859-7 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos
|
||||
ISO-8859-8 Y glibc aix hpux osf solaris cygwin zos
|
||||
ISO-8859-9 Y glibc aix hpux irix osf solaris freebsd darwin cygwin zos
|
||||
ISO-8859-13 glibc hpux solaris freebsd netbsd openbsd darwin cygwin
|
||||
ISO-8859-14 glibc cygwin
|
||||
ISO-8859-15 glibc aix irix osf solaris freebsd netbsd openbsd darwin cygwin
|
||||
KOI8-R Y glibc hpux solaris freebsd netbsd openbsd darwin
|
||||
KOI8-U Y glibc freebsd netbsd openbsd darwin cygwin
|
||||
KOI8-T glibc
|
||||
CP437 dos
|
||||
CP775 dos
|
||||
CP850 aix osf dos
|
||||
CP852 dos
|
||||
CP855 dos
|
||||
CP856 aix
|
||||
CP857 dos
|
||||
CP861 dos
|
||||
CP862 dos
|
||||
CP864 dos
|
||||
CP865 dos
|
||||
CP866 freebsd netbsd openbsd darwin dos
|
||||
CP869 dos
|
||||
CP874 windows dos
|
||||
CP922 aix
|
||||
CP932 aix cygwin windows dos
|
||||
CP943 aix zos
|
||||
CP949 osf darwin windows dos
|
||||
CP950 windows dos
|
||||
CP1046 aix
|
||||
CP1124 aix
|
||||
CP1125 dos
|
||||
CP1129 aix
|
||||
CP1131 freebsd darwin
|
||||
CP1250 windows
|
||||
CP1251 glibc hpux solaris freebsd netbsd openbsd darwin cygwin windows
|
||||
CP1252 aix windows
|
||||
CP1253 windows
|
||||
CP1254 windows
|
||||
CP1255 glibc windows
|
||||
CP1256 windows
|
||||
CP1257 windows
|
||||
GB2312 Y glibc aix hpux irix solaris freebsd netbsd darwin cygwin zos
|
||||
EUC-JP Y glibc aix hpux irix osf solaris freebsd netbsd darwin cygwin
|
||||
EUC-KR Y glibc aix hpux irix osf solaris freebsd netbsd darwin cygwin zos
|
||||
EUC-TW glibc aix hpux irix osf solaris netbsd
|
||||
BIG5 Y glibc aix hpux osf solaris freebsd netbsd darwin cygwin zos
|
||||
BIG5-HKSCS glibc hpux solaris netbsd darwin
|
||||
GBK glibc aix osf solaris freebsd darwin cygwin windows dos
|
||||
GB18030 glibc hpux solaris freebsd netbsd darwin
|
||||
SHIFT_JIS Y hpux osf solaris freebsd netbsd darwin
|
||||
JOHAB glibc solaris windows
|
||||
TIS-620 glibc aix hpux osf solaris cygwin zos
|
||||
VISCII Y glibc
|
||||
TCVN5712-1 glibc
|
||||
ARMSCII-8 glibc freebsd netbsd darwin
|
||||
GEORGIAN-PS glibc cygwin
|
||||
PT154 glibc netbsd cygwin
|
||||
HP-ROMAN8 hpux
|
||||
HP-ARABIC8 hpux
|
||||
HP-GREEK8 hpux
|
||||
HP-HEBREW8 hpux
|
||||
HP-TURKISH8 hpux
|
||||
HP-KANA8 hpux
|
||||
DEC-KANJI osf
|
||||
DEC-HANYU osf
|
||||
UTF-8 Y glibc aix hpux osf solaris netbsd darwin cygwin zos
|
||||
|
||||
Note: Names which are not marked as being a MIME name should not be used in
|
||||
Internet protocols for information interchange (mail, news, etc.).
|
||||
|
||||
Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications
|
||||
must understand both names and treat them as equivalent.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _LOCALCHARSET_H */
|
||||
@@ -1,543 +0,0 @@
|
||||
/* zconf.h -- configuration of the zlib compression library
|
||||
* Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
/* @(#) $Id$ */
|
||||
|
||||
#ifndef ZCONF_H
|
||||
#define ZCONF_H
|
||||
|
||||
/*
|
||||
* If you *really* need a unique prefix for all types and library functions,
|
||||
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
|
||||
* Even better than compiling with -DZ_PREFIX would be to use configure to set
|
||||
* this permanently in zconf.h using "./configure --zprefix".
|
||||
*/
|
||||
#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */
|
||||
# define Z_PREFIX_SET
|
||||
|
||||
/* all linked symbols and init macros */
|
||||
# define _dist_code z__dist_code
|
||||
# define _length_code z__length_code
|
||||
# define _tr_align z__tr_align
|
||||
# define _tr_flush_bits z__tr_flush_bits
|
||||
# define _tr_flush_block z__tr_flush_block
|
||||
# define _tr_init z__tr_init
|
||||
# define _tr_stored_block z__tr_stored_block
|
||||
# define _tr_tally z__tr_tally
|
||||
# define adler32 z_adler32
|
||||
# define adler32_combine z_adler32_combine
|
||||
# define adler32_combine64 z_adler32_combine64
|
||||
# define adler32_z z_adler32_z
|
||||
# ifndef Z_SOLO
|
||||
# define compress z_compress
|
||||
# define compress2 z_compress2
|
||||
# define compressBound z_compressBound
|
||||
# endif
|
||||
# define crc32 z_crc32
|
||||
# define crc32_combine z_crc32_combine
|
||||
# define crc32_combine64 z_crc32_combine64
|
||||
# define crc32_combine_gen z_crc32_combine_gen
|
||||
# define crc32_combine_gen64 z_crc32_combine_gen64
|
||||
# define crc32_combine_op z_crc32_combine_op
|
||||
# define crc32_z z_crc32_z
|
||||
# define deflate z_deflate
|
||||
# define deflateBound z_deflateBound
|
||||
# define deflateCopy z_deflateCopy
|
||||
# define deflateEnd z_deflateEnd
|
||||
# define deflateGetDictionary z_deflateGetDictionary
|
||||
# define deflateInit z_deflateInit
|
||||
# define deflateInit2 z_deflateInit2
|
||||
# define deflateInit2_ z_deflateInit2_
|
||||
# define deflateInit_ z_deflateInit_
|
||||
# define deflateParams z_deflateParams
|
||||
# define deflatePending z_deflatePending
|
||||
# define deflatePrime z_deflatePrime
|
||||
# define deflateReset z_deflateReset
|
||||
# define deflateResetKeep z_deflateResetKeep
|
||||
# define deflateSetDictionary z_deflateSetDictionary
|
||||
# define deflateSetHeader z_deflateSetHeader
|
||||
# define deflateTune z_deflateTune
|
||||
# define deflate_copyright z_deflate_copyright
|
||||
# define get_crc_table z_get_crc_table
|
||||
# ifndef Z_SOLO
|
||||
# define gz_error z_gz_error
|
||||
# define gz_intmax z_gz_intmax
|
||||
# define gz_strwinerror z_gz_strwinerror
|
||||
# define gzbuffer z_gzbuffer
|
||||
# define gzclearerr z_gzclearerr
|
||||
# define gzclose z_gzclose
|
||||
# define gzclose_r z_gzclose_r
|
||||
# define gzclose_w z_gzclose_w
|
||||
# define gzdirect z_gzdirect
|
||||
# define gzdopen z_gzdopen
|
||||
# define gzeof z_gzeof
|
||||
# define gzerror z_gzerror
|
||||
# define gzflush z_gzflush
|
||||
# define gzfread z_gzfread
|
||||
# define gzfwrite z_gzfwrite
|
||||
# define gzgetc z_gzgetc
|
||||
# define gzgetc_ z_gzgetc_
|
||||
# define gzgets z_gzgets
|
||||
# define gzoffset z_gzoffset
|
||||
# define gzoffset64 z_gzoffset64
|
||||
# define gzopen z_gzopen
|
||||
# define gzopen64 z_gzopen64
|
||||
# ifdef _WIN32
|
||||
# define gzopen_w z_gzopen_w
|
||||
# endif
|
||||
# define gzprintf z_gzprintf
|
||||
# define gzputc z_gzputc
|
||||
# define gzputs z_gzputs
|
||||
# define gzread z_gzread
|
||||
# define gzrewind z_gzrewind
|
||||
# define gzseek z_gzseek
|
||||
# define gzseek64 z_gzseek64
|
||||
# define gzsetparams z_gzsetparams
|
||||
# define gztell z_gztell
|
||||
# define gztell64 z_gztell64
|
||||
# define gzungetc z_gzungetc
|
||||
# define gzvprintf z_gzvprintf
|
||||
# define gzwrite z_gzwrite
|
||||
# endif
|
||||
# define inflate z_inflate
|
||||
# define inflateBack z_inflateBack
|
||||
# define inflateBackEnd z_inflateBackEnd
|
||||
# define inflateBackInit z_inflateBackInit
|
||||
# define inflateBackInit_ z_inflateBackInit_
|
||||
# define inflateCodesUsed z_inflateCodesUsed
|
||||
# define inflateCopy z_inflateCopy
|
||||
# define inflateEnd z_inflateEnd
|
||||
# define inflateGetDictionary z_inflateGetDictionary
|
||||
# define inflateGetHeader z_inflateGetHeader
|
||||
# define inflateInit z_inflateInit
|
||||
# define inflateInit2 z_inflateInit2
|
||||
# define inflateInit2_ z_inflateInit2_
|
||||
# define inflateInit_ z_inflateInit_
|
||||
# define inflateMark z_inflateMark
|
||||
# define inflatePrime z_inflatePrime
|
||||
# define inflateReset z_inflateReset
|
||||
# define inflateReset2 z_inflateReset2
|
||||
# define inflateResetKeep z_inflateResetKeep
|
||||
# define inflateSetDictionary z_inflateSetDictionary
|
||||
# define inflateSync z_inflateSync
|
||||
# define inflateSyncPoint z_inflateSyncPoint
|
||||
# define inflateUndermine z_inflateUndermine
|
||||
# define inflateValidate z_inflateValidate
|
||||
# define inflate_copyright z_inflate_copyright
|
||||
# define inflate_fast z_inflate_fast
|
||||
# define inflate_table z_inflate_table
|
||||
# ifndef Z_SOLO
|
||||
# define uncompress z_uncompress
|
||||
# define uncompress2 z_uncompress2
|
||||
# endif
|
||||
# define zError z_zError
|
||||
# ifndef Z_SOLO
|
||||
# define zcalloc z_zcalloc
|
||||
# define zcfree z_zcfree
|
||||
# endif
|
||||
# define zlibCompileFlags z_zlibCompileFlags
|
||||
# define zlibVersion z_zlibVersion
|
||||
|
||||
/* all zlib typedefs in zlib.h and zconf.h */
|
||||
# define Byte z_Byte
|
||||
# define Bytef z_Bytef
|
||||
# define alloc_func z_alloc_func
|
||||
# define charf z_charf
|
||||
# define free_func z_free_func
|
||||
# ifndef Z_SOLO
|
||||
# define gzFile z_gzFile
|
||||
# endif
|
||||
# define gz_header z_gz_header
|
||||
# define gz_headerp z_gz_headerp
|
||||
# define in_func z_in_func
|
||||
# define intf z_intf
|
||||
# define out_func z_out_func
|
||||
# define uInt z_uInt
|
||||
# define uIntf z_uIntf
|
||||
# define uLong z_uLong
|
||||
# define uLongf z_uLongf
|
||||
# define voidp z_voidp
|
||||
# define voidpc z_voidpc
|
||||
# define voidpf z_voidpf
|
||||
|
||||
/* all zlib structs in zlib.h and zconf.h */
|
||||
# define gz_header_s z_gz_header_s
|
||||
# define internal_state z_internal_state
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__MSDOS__) && !defined(MSDOS)
|
||||
# define MSDOS
|
||||
#endif
|
||||
#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
|
||||
# define OS2
|
||||
#endif
|
||||
#if defined(_WINDOWS) && !defined(WINDOWS)
|
||||
# define WINDOWS
|
||||
#endif
|
||||
#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
|
||||
# ifndef WIN32
|
||||
# define WIN32
|
||||
# endif
|
||||
#endif
|
||||
#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
|
||||
# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
|
||||
# ifndef SYS16BIT
|
||||
# define SYS16BIT
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Compile with -DMAXSEG_64K if the alloc function cannot allocate more
|
||||
* than 64k bytes at a time (needed on systems with 16-bit int).
|
||||
*/
|
||||
#ifdef SYS16BIT
|
||||
# define MAXSEG_64K
|
||||
#endif
|
||||
#ifdef MSDOS
|
||||
# define UNALIGNED_OK
|
||||
#endif
|
||||
|
||||
#ifdef __STDC_VERSION__
|
||||
# ifndef STDC
|
||||
# define STDC
|
||||
# endif
|
||||
# if __STDC_VERSION__ >= 199901L
|
||||
# ifndef STDC99
|
||||
# define STDC99
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
|
||||
# define STDC
|
||||
#endif
|
||||
#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
|
||||
# define STDC
|
||||
#endif
|
||||
#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
|
||||
# define STDC
|
||||
#endif
|
||||
#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
|
||||
# define STDC
|
||||
#endif
|
||||
|
||||
#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */
|
||||
# define STDC
|
||||
#endif
|
||||
|
||||
#ifndef STDC
|
||||
# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
|
||||
# define const /* note: need a more gentle solution here */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ZLIB_CONST) && !defined(z_const)
|
||||
# define z_const const
|
||||
#else
|
||||
# define z_const
|
||||
#endif
|
||||
|
||||
#ifdef Z_SOLO
|
||||
# ifdef _WIN64
|
||||
typedef unsigned long long z_size_t;
|
||||
# else
|
||||
typedef unsigned long z_size_t;
|
||||
# endif
|
||||
#else
|
||||
# define z_longlong long long
|
||||
# if defined(NO_SIZE_T)
|
||||
typedef unsigned NO_SIZE_T z_size_t;
|
||||
# elif defined(STDC)
|
||||
# include <stddef.h>
|
||||
typedef size_t z_size_t;
|
||||
# else
|
||||
typedef unsigned long z_size_t;
|
||||
# endif
|
||||
# undef z_longlong
|
||||
#endif
|
||||
|
||||
/* Maximum value for memLevel in deflateInit2 */
|
||||
#ifndef MAX_MEM_LEVEL
|
||||
# ifdef MAXSEG_64K
|
||||
# define MAX_MEM_LEVEL 8
|
||||
# else
|
||||
# define MAX_MEM_LEVEL 9
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Maximum value for windowBits in deflateInit2 and inflateInit2.
|
||||
* WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
|
||||
* created by gzip. (Files created by minigzip can still be extracted by
|
||||
* gzip.)
|
||||
*/
|
||||
#ifndef MAX_WBITS
|
||||
# define MAX_WBITS 15 /* 32K LZ77 window */
|
||||
#endif
|
||||
|
||||
/* The memory requirements for deflate are (in bytes):
|
||||
(1 << (windowBits+2)) + (1 << (memLevel+9))
|
||||
that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
|
||||
plus a few kilobytes for small objects. For example, if you want to reduce
|
||||
the default memory requirements from 256K to 128K, compile with
|
||||
make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
|
||||
Of course this will generally degrade compression (there's no free lunch).
|
||||
|
||||
The memory requirements for inflate are (in bytes) 1 << windowBits
|
||||
that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
|
||||
for small objects.
|
||||
*/
|
||||
|
||||
/* Type declarations */
|
||||
|
||||
#ifndef OF /* function prototypes */
|
||||
# ifdef STDC
|
||||
# define OF(args) args
|
||||
# else
|
||||
# define OF(args) ()
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* The following definitions for FAR are needed only for MSDOS mixed
|
||||
* model programming (small or medium model with some far allocations).
|
||||
* This was tested only with MSC; for other MSDOS compilers you may have
|
||||
* to define NO_MEMCPY in zutil.h. If you don't need the mixed model,
|
||||
* just define FAR to be empty.
|
||||
*/
|
||||
#ifdef SYS16BIT
|
||||
# if defined(M_I86SM) || defined(M_I86MM)
|
||||
/* MSC small or medium model */
|
||||
# define SMALL_MEDIUM
|
||||
# ifdef _MSC_VER
|
||||
# define FAR _far
|
||||
# else
|
||||
# define FAR far
|
||||
# endif
|
||||
# endif
|
||||
# if (defined(__SMALL__) || defined(__MEDIUM__))
|
||||
/* Turbo C small or medium model */
|
||||
# define SMALL_MEDIUM
|
||||
# ifdef __BORLANDC__
|
||||
# define FAR _far
|
||||
# else
|
||||
# define FAR far
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(WINDOWS) || defined(WIN32)
|
||||
/* If building or using zlib as a DLL, define ZLIB_DLL.
|
||||
* This is not mandatory, but it offers a little performance increase.
|
||||
*/
|
||||
# ifdef ZLIB_DLL
|
||||
# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
|
||||
# ifdef ZLIB_INTERNAL
|
||||
# define ZEXTERN extern __declspec(dllexport)
|
||||
# else
|
||||
# define ZEXTERN extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
# endif /* ZLIB_DLL */
|
||||
/* If building or using zlib with the WINAPI/WINAPIV calling convention,
|
||||
* define ZLIB_WINAPI.
|
||||
* Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
|
||||
*/
|
||||
# ifdef ZLIB_WINAPI
|
||||
# ifdef FAR
|
||||
# undef FAR
|
||||
# endif
|
||||
# ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# endif
|
||||
# include <windows.h>
|
||||
/* No need for _export, use ZLIB.DEF instead. */
|
||||
/* For complete Windows compatibility, use WINAPI, not __stdcall. */
|
||||
# define ZEXPORT WINAPI
|
||||
# ifdef WIN32
|
||||
# define ZEXPORTVA WINAPIV
|
||||
# else
|
||||
# define ZEXPORTVA FAR CDECL
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined (__BEOS__)
|
||||
# ifdef ZLIB_DLL
|
||||
# ifdef ZLIB_INTERNAL
|
||||
# define ZEXPORT __declspec(dllexport)
|
||||
# define ZEXPORTVA __declspec(dllexport)
|
||||
# else
|
||||
# define ZEXPORT __declspec(dllimport)
|
||||
# define ZEXPORTVA __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef ZEXTERN
|
||||
# define ZEXTERN extern
|
||||
#endif
|
||||
#ifndef ZEXPORT
|
||||
# define ZEXPORT
|
||||
#endif
|
||||
#ifndef ZEXPORTVA
|
||||
# define ZEXPORTVA
|
||||
#endif
|
||||
|
||||
#ifndef FAR
|
||||
# define FAR
|
||||
#endif
|
||||
|
||||
#if !defined(__MACTYPES__)
|
||||
typedef unsigned char Byte; /* 8 bits */
|
||||
#endif
|
||||
typedef unsigned int uInt; /* 16 bits or more */
|
||||
typedef unsigned long uLong; /* 32 bits or more */
|
||||
|
||||
#ifdef SMALL_MEDIUM
|
||||
/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
|
||||
# define Bytef Byte FAR
|
||||
#else
|
||||
typedef Byte FAR Bytef;
|
||||
#endif
|
||||
typedef char FAR charf;
|
||||
typedef int FAR intf;
|
||||
typedef uInt FAR uIntf;
|
||||
typedef uLong FAR uLongf;
|
||||
|
||||
#ifdef STDC
|
||||
typedef void const *voidpc;
|
||||
typedef void FAR *voidpf;
|
||||
typedef void *voidp;
|
||||
#else
|
||||
typedef Byte const *voidpc;
|
||||
typedef Byte FAR *voidpf;
|
||||
typedef Byte *voidp;
|
||||
#endif
|
||||
|
||||
#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
|
||||
# include <limits.h>
|
||||
# if (UINT_MAX == 0xffffffffUL)
|
||||
# define Z_U4 unsigned
|
||||
# elif (ULONG_MAX == 0xffffffffUL)
|
||||
# define Z_U4 unsigned long
|
||||
# elif (USHRT_MAX == 0xffffffffUL)
|
||||
# define Z_U4 unsigned short
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef Z_U4
|
||||
typedef Z_U4 z_crc_t;
|
||||
#else
|
||||
typedef unsigned long z_crc_t;
|
||||
#endif
|
||||
|
||||
#if 1 /* was set to #if 1 by ./configure */
|
||||
# define Z_HAVE_UNISTD_H
|
||||
#endif
|
||||
|
||||
#if 1 /* was set to #if 1 by ./configure */
|
||||
# define Z_HAVE_STDARG_H
|
||||
#endif
|
||||
|
||||
#ifdef STDC
|
||||
# ifndef Z_SOLO
|
||||
# include <sys/types.h> /* for off_t */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(STDC) || defined(Z_HAVE_STDARG_H)
|
||||
# ifndef Z_SOLO
|
||||
# include <stdarg.h> /* for va_list */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifndef Z_SOLO
|
||||
# include <stddef.h> /* for wchar_t */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
|
||||
* "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
|
||||
* though the former does not conform to the LFS document), but considering
|
||||
* both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
|
||||
* equivalently requesting no 64-bit operations
|
||||
*/
|
||||
#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
|
||||
# undef _LARGEFILE64_SOURCE
|
||||
#endif
|
||||
|
||||
#ifndef Z_HAVE_UNISTD_H
|
||||
# ifdef __WATCOMC__
|
||||
# define Z_HAVE_UNISTD_H
|
||||
# endif
|
||||
#endif
|
||||
#ifndef Z_HAVE_UNISTD_H
|
||||
# if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32)
|
||||
# define Z_HAVE_UNISTD_H
|
||||
# endif
|
||||
#endif
|
||||
#ifndef Z_SOLO
|
||||
# if defined(Z_HAVE_UNISTD_H)
|
||||
# include <unistd.h> /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
|
||||
# ifdef VMS
|
||||
# include <unixio.h> /* for off_t */
|
||||
# endif
|
||||
# ifndef z_off_t
|
||||
# define z_off_t off_t
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
|
||||
# define Z_LFS64
|
||||
#endif
|
||||
|
||||
#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
|
||||
# define Z_LARGE64
|
||||
#endif
|
||||
|
||||
#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
|
||||
# define Z_WANT64
|
||||
#endif
|
||||
|
||||
#if !defined(SEEK_SET) && !defined(Z_SOLO)
|
||||
# define SEEK_SET 0 /* Seek from beginning of file. */
|
||||
# define SEEK_CUR 1 /* Seek from current position. */
|
||||
# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */
|
||||
#endif
|
||||
|
||||
#ifndef z_off_t
|
||||
# define z_off_t long
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) && defined(Z_LARGE64)
|
||||
# define z_off64_t off64_t
|
||||
#else
|
||||
# if defined(_WIN32) && !defined(__GNUC__)
|
||||
# define z_off64_t __int64
|
||||
# else
|
||||
# define z_off64_t z_off_t
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* MVS linker does not support external names larger than 8 bytes */
|
||||
#if defined(__MVS__)
|
||||
#pragma map(deflateInit_,"DEIN")
|
||||
#pragma map(deflateInit2_,"DEIN2")
|
||||
#pragma map(deflateEnd,"DEEND")
|
||||
#pragma map(deflateBound,"DEBND")
|
||||
#pragma map(inflateInit_,"ININ")
|
||||
#pragma map(inflateInit2_,"ININ2")
|
||||
#pragma map(inflateEnd,"INEND")
|
||||
#pragma map(inflateSync,"INSY")
|
||||
#pragma map(inflateSetDictionary,"INSEDI")
|
||||
#pragma map(compressBound,"CMBND")
|
||||
#pragma map(inflate_table,"INTABL")
|
||||
#pragma map(inflate_fast,"INFA")
|
||||
#pragma map(inflate_copyright,"INCOPY")
|
||||
#endif
|
||||
|
||||
#endif /* ZCONF_H */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,56 +0,0 @@
|
||||
from libc.string cimport const_char
|
||||
|
||||
from lxml.includes.tree cimport xmlDoc
|
||||
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||||
from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1
|
||||
|
||||
cdef extern from "libxml/HTMLparser.h" nogil:
|
||||
ctypedef enum htmlParserOption:
|
||||
HTML_PARSE_NOERROR # suppress error reports
|
||||
HTML_PARSE_NOWARNING # suppress warning reports
|
||||
HTML_PARSE_PEDANTIC # pedantic error reporting
|
||||
HTML_PARSE_NOBLANKS # remove blank nodes
|
||||
HTML_PARSE_NONET # Forbid network access
|
||||
# libxml2 2.6.21+ only:
|
||||
HTML_PARSE_RECOVER # Relaxed parsing
|
||||
HTML_PARSE_COMPACT # compact small text nodes
|
||||
# libxml2 2.7.7+ only:
|
||||
HTML_PARSE_NOIMPLIED # Do not add implied html/body... elements
|
||||
# libxml2 2.7.8+ only:
|
||||
HTML_PARSE_NODEFDTD # do not default a doctype if not found
|
||||
# libxml2 2.8.0+ only:
|
||||
XML_PARSE_IGNORE_ENC # ignore internal document encoding hint
|
||||
|
||||
xmlSAXHandlerV1 htmlDefaultSAXHandler
|
||||
|
||||
cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(
|
||||
char* buffer, int size)
|
||||
cdef xmlParserCtxt* htmlCreateFileParserCtxt(
|
||||
char* filename, char* encoding)
|
||||
cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||||
void* user_data,
|
||||
char* chunk, int size,
|
||||
char* filename, int enc)
|
||||
cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt)
|
||||
cdef void htmlCtxtReset(xmlParserCtxt* ctxt)
|
||||
cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options)
|
||||
cdef int htmlParseDocument(xmlParserCtxt* ctxt)
|
||||
cdef int htmlParseChunk(xmlParserCtxt* ctxt,
|
||||
char* chunk, int size, int terminate)
|
||||
|
||||
cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
|
||||
char* filename, const_char* encoding,
|
||||
int options)
|
||||
cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
|
||||
char* buffer, char* URL, const_char* encoding,
|
||||
int options)
|
||||
cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt,
|
||||
xmlInputReadCallback ioread,
|
||||
xmlInputCloseCallback ioclose,
|
||||
void* ioctx,
|
||||
char* URL, const_char* encoding,
|
||||
int options)
|
||||
cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt,
|
||||
char* buffer, int size,
|
||||
char* filename, const_char* encoding,
|
||||
int options)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user