Add blueprints and fake content

This commit is contained in:
Paul Nicoué 2021-11-18 17:44:47 +01:00
parent 1ff19bf38f
commit 8235816462
592 changed files with 22385 additions and 31535 deletions

View file

@ -0,0 +1,165 @@
<?php
namespace Kirby\Sane;
use DOMAttr;
use DOMDocumentType;
use DOMElement;
use Kirby\Toolkit\Dom;
/**
* Base class for Sane handlers with DOM file types
* @since 3.5.8
*
* @package Kirby Sane
* @author Lukas Bestle <lukas@getkirby.com>
* @link https://getkirby.com
* @copyright Bastian Allgeier GmbH
* @license https://opensource.org/licenses/MIT
*/
class DomHandler extends Handler
{
/**
* List of all MIME types that may
* be used in data URIs
*
* @var array
*/
public static $allowedDataUris = [
'data:image/png',
'data:image/gif',
'data:image/jpg',
'data:image/jpe',
'data:image/pjp',
'data:img/png',
'data:img/gif',
'data:img/jpg',
'data:img/jpe',
'data:img/pjp',
];
/**
* Allowed hostnames for HTTP(S) URLs
*
* @var array
*/
public static $allowedDomains = [];
/**
* Names of allowed XML processing instructions
*
* @var array
*/
public static $allowedPIs = [];
/**
* The document type (`'HTML'` or `'XML'`)
* (to be set in child classes)
*
* @var string
*/
protected static $type = 'XML';
/**
* Sanitizes the given string
*
* @param string $string
* @return string
*
* @throws \Kirby\Exception\InvalidArgumentException If the file couldn't be parsed
*/
public static function sanitize(string $string): string
{
$dom = static::parse($string);
$dom->sanitize(static::options());
return $dom->toString();
}
/**
* Validates file contents
*
* @param string $string
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the file couldn't be parsed
* @throws \Kirby\Exception\InvalidArgumentException If the file didn't pass validation
*/
public static function validate(string $string): void
{
$dom = static::parse($string);
$errors = $dom->sanitize(static::options());
if (count($errors) > 0) {
// there may be multiple errors, we can only throw one of them at a time
throw $errors[0];
}
}
/**
* Custom callback for additional attribute sanitization
* @internal
*
* @param \DOMAttr $attr
* @return array Array with exception objects for each modification
*/
public static function sanitizeAttr(DOMAttr $attr): array
{
// to be extended in child classes
return [];
}
/**
* Custom callback for additional element sanitization
* @internal
*
* @param \DOMElement $element
* @return array Array with exception objects for each modification
*/
public static function sanitizeElement(DOMElement $element): array
{
// to be extended in child classes
return [];
}
/**
* Custom callback for additional doctype validation
* @internal
*
* @param \DOMDocumentType $doctype
* @return void
*/
public static function validateDoctype(DOMDocumentType $doctype): void
{
// to be extended in child classes
}
/**
* Returns the sanitization options for the handler
* (to be extended in child classes)
*
* @return array
*/
protected static function options(): array
{
return [
'allowedDataUris' => static::$allowedDataUris,
'allowedDomains' => static::$allowedDomains,
'allowedPIs' => static::$allowedPIs,
'attrCallback' => [static::class, 'sanitizeAttr'],
'doctypeCallback' => [static::class, 'validateDoctype'],
'elementCallback' => [static::class, 'sanitizeElement'],
];
}
/**
* Parses the given string into a `Toolkit\Dom` object
*
* @param string $string
* @return \Kirby\Toolkit\Dom
*
* @throws \Kirby\Exception\InvalidArgumentException If the file couldn't be parsed
*/
protected static function parse(string $string)
{
return new Dom($string, static::$type);
}
}

View file

@ -3,7 +3,7 @@
namespace Kirby\Sane;
use Kirby\Exception\Exception;
use Kirby\Toolkit\F;
use Kirby\Filesystem\F;
/**
* Base handler abstract,
@ -19,6 +19,30 @@ use Kirby\Toolkit\F;
*/
abstract class Handler
{
/**
* Sanitizes the given string
*
* @param string $string
* @return string
*/
abstract public static function sanitize(string $string): string;
/**
* Sanitizes the contents of a file by overwriting
* the file with the sanitized version
*
* @param string $file
* @return void
*
* @throws \Kirby\Exception\Exception If the file does not exist
* @throws \Kirby\Exception\Exception On other errors
*/
public static function sanitizeFile(string $file): void
{
$sanitized = static::sanitize(static::readFile($file));
F::write($file, $sanitized);
}
/**
* Validates file contents
*
@ -37,15 +61,31 @@ abstract class Handler
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the file didn't pass validation
* @throws \Kirby\Exception\Exception If the file does not exist
* @throws \Kirby\Exception\Exception On other errors
*/
public static function validateFile(string $file): void
{
static::validate(static::readFile($file));
}
/**
* Reads the contents of a file
* for sanitization or validation
*
* @param string $file
* @return string
*
* @throws \Kirby\Exception\Exception If the file does not exist
*/
protected static function readFile(string $file): string
{
$contents = F::read($file);
if ($contents === false) {
throw new Exception('The file "' . $file . '" does not exist');
}
static::validate($contents);
return $contents;
}
}

144
kirby/src/Sane/Html.php Normal file
View file

@ -0,0 +1,144 @@
<?php
namespace Kirby\Sane;
/**
* Sane handler for HTML files
* @since 3.5.8
*
* @package Kirby Sane
* @author Bastian Allgeier <bastian@getkirby.com>,
* Lukas Bestle <lukas@getkirby.com>
* @link https://getkirby.com
* @copyright Bastian Allgeier GmbH
* @license https://opensource.org/licenses/MIT
*/
class Html extends DomHandler
{
/**
* Global list of allowed attribute prefixes
*
* @var array
*/
public static $allowedAttrPrefixes = [
'aria-',
'data-',
];
/**
* Global list of allowed attributes
*
* @var array
*/
public static $allowedAttrs = [
'class',
'id',
];
/**
* Allowed hostnames for HTTP(S) URLs
*
* @var array
*/
public static $allowedDomains = true;
/**
* Associative array of all allowed tag names with the value
* of either an array with the list of all allowed attributes
* for this tag, `true` to allow any attribute from the
* `allowedAttrs` list or `false` to allow the tag without
* any attributes
*
* @var array
*/
public static $allowedTags = [
'a' => ['href', 'rel', 'title', 'target'],
'abbr' => ['title'],
'b' => true,
'body' => true,
'blockquote' => true,
'br' => true,
'code' => true,
'dl' => true,
'dd' => true,
'del' => true,
'div' => true,
'dt' => true,
'em' => true,
'footer' => true,
'h1' => true,
'h2' => true,
'h3' => true,
'h4' => true,
'h5' => true,
'h6' => true,
'hr' => true,
'html' => true,
'i' => true,
'ins' => true,
'li' => true,
'small' => true,
'span' => true,
'strong' => true,
'sub' => true,
'sup' => true,
'ol' => true,
'p' => true,
'pre' => true,
's' => true,
'u' => true,
'ul' => true,
];
/**
* Array of explicitly disallowed tags
*
* IMPORTANT: Use lower-case names here because
* of the case-insensitive matching
*
* @var array
*/
public static $disallowedTags = [
'iframe',
'meta',
'object',
'script',
'style',
];
/**
* List of attributes that may contain URLs
*
* @var array
*/
public static $urlAttrs = [
'href',
'src',
'xlink:href',
];
/**
* The document type (`'HTML'` or `'XML'`)
*
* @var string
*/
protected static $type = 'HTML';
/**
* Returns the sanitization options for the handler
*
* @return array
*/
protected static function options(): array
{
return array_merge(parent::options(), [
'allowedAttrPrefixes' => static::$allowedAttrPrefixes,
'allowedAttrs' => static::$allowedAttrs,
'allowedNamespaces' => [],
'allowedPIs' => [],
'allowedTags' => static::$allowedTags,
'disallowedTags' => static::$disallowedTags,
'urlAttrs' => static::$urlAttrs,
]);
}
}

View file

@ -2,8 +2,9 @@
namespace Kirby\Sane;
use Kirby\Exception\LogicException;
use Kirby\Exception\NotFoundException;
use Kirby\Toolkit\F;
use Kirby\Filesystem\F;
/**
* The `Sane` class validates that files
@ -26,8 +27,10 @@ class Sane
* @var array
*/
public static $aliases = [
'image/svg+xml' => 'svg',
'application/xml' => 'xml',
'image/svg' => 'svg',
'image/svg+xml' => 'svg',
'text/html' => 'html',
'text/xml' => 'xml',
];
@ -37,6 +40,7 @@ class Sane
* @var array
*/
public static $handlers = [
'html' => 'Kirby\Sane\Html',
'svg' => 'Kirby\Sane\Svg',
'svgz' => 'Kirby\Sane\Svgz',
'xml' => 'Kirby\Sane\Xml',
@ -72,10 +76,68 @@ class Sane
throw new NotFoundException('Missing handler for type: "' . $type . '"');
}
/**
* Sanitizes the given string with the specified handler
* @since 3.6.0
*
* @param string $string
* @param string $type
* @return string
*/
public static function sanitize(string $string, string $type): string
{
return static::handler($type)->sanitize($string);
}
/**
* Sanitizes the contents of a file by overwriting
* the file with the sanitized version;
* the sane handlers are automatically chosen by
* the extension and MIME type if not specified
* @since 3.6.0
*
* @param string $file
* @param string|bool $typeLazy Explicit handler type string,
* `true` for lazy autodetection or
* `false` for normal autodetection
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the file didn't pass validation
* @throws \Kirby\Exception\LogicException If more than one handler applies
* @throws \Kirby\Exception\NotFoundException If the handler was not found
* @throws \Kirby\Exception\Exception On other errors
*/
public static function sanitizeFile(string $file, $typeLazy = false): void
{
if (is_string($typeLazy) === true) {
static::handler($typeLazy)->sanitizeFile($file);
return;
}
// try to find exactly one matching handler
$handlers = static::handlersForFile($file, $typeLazy === true);
switch (count($handlers)) {
case 0:
// lazy autodetection didn't find a handler
break;
case 1:
$handlers[0]->sanitizeFile($file);
break;
default:
// more than one matching handler;
// sanitizing with all handlers will not leave much in the output
$handlerNames = array_map('get_class', $handlers);
throw new LogicException(
'Cannot sanitize file as more than one handler applies: ' .
implode(', ', $handlerNames)
);
}
}
/**
* Validates file contents with the specified handler
*
* @param mixed $string
* @param string $string
* @param string $type
* @return void
*
@ -110,20 +172,38 @@ class Sane
return;
}
$options = [F::extension($file), F::mime($file)];
// execute all handlers, but each class only once for performance;
// filter out all empty options
$usedHandlers = [];
foreach (array_filter($options) as $option) {
$handler = static::handler($option, $typeLazy === true);
$handlerClass = $handler ? get_class($handler) : null;
if ($handler && in_array($handlerClass, $usedHandlers) === false) {
$handler->validateFile($file);
$usedHandlers[] = $handlerClass;
}
foreach (static::handlersForFile($file, $typeLazy === true) as $handler) {
$handler->validateFile($file);
}
}
/**
* Returns all handler objects that apply to the given file based on
* file extension and MIME type
*
* @param string $file
* @param bool $lazy If set to `true`, undefined handlers are skipped
* @return array<\Kirby\Sane\Handler>
*/
protected static function handlersForFile(string $file, bool $lazy = false): array
{
$handlers = $handlerClasses = [];
// all values that can be used for the handler search;
// filter out all empty options
$options = array_filter([F::extension($file), F::mime($file)]);
foreach ($options as $option) {
$handler = static::handler($option, $lazy);
$handlerClass = $handler ? get_class($handler) : null;
// ensure that each handler class is only returned once
if ($handler && in_array($handlerClass, $handlerClasses) === false) {
$handlers[] = $handler;
$handlerClasses[] = $handlerClass;
}
}
return $handlers;
}
}

View file

@ -2,11 +2,12 @@
namespace Kirby\Sane;
use DOMAttr;
use DOMDocumentType;
use DOMNode;
use DOMNodeList;
use DOMElement;
use DOMXPath;
use Kirby\Exception\InvalidArgumentException;
use Kirby\Toolkit\Dom;
use Kirby\Toolkit\Str;
/**
@ -29,7 +30,23 @@ class Svg extends Xml
* @copyright 2015 Mario Heiderich
* @license https://www.apache.org/licenses/LICENSE-2.0
*/
public static $allowedAttributes = [
/**
* Global list of allowed attribute prefixes
*
* @var array
*/
public static $allowedAttrPrefixes = [
'aria-',
'data-',
];
/**
* Global list of allowed attributes
*
* @var array
*/
public static $allowedAttrs = [
'accent-height',
'accumulate',
'additive',
@ -213,90 +230,108 @@ class Svg extends Xml
'zoomAndPan',
];
public static $allowedElements = [
'svg',
'a',
'altGlyph',
'altGlyphDef',
'altGlyphItem',
'animateColor',
'animateMotion',
'animateTransform',
'circle',
'clipPath',
'defs',
'desc',
'ellipse',
'filter',
'font',
'g',
'glyph',
'glyphRef',
'hkern',
'image',
'line',
'linearGradient',
'marker',
'mask',
'metadata',
'mpath',
'path',
'pattern',
'polygon',
'polyline',
'radialGradient',
'rect',
'stop',
'style',
'switch',
'symbol',
'text',
'textPath',
'title',
'tref',
'tspan',
'use',
'view',
'vkern',
];
public static $allowedFilters = [
'feBlend',
'feColorMatrix',
'feComponentTransfer',
'feComposite',
'feConvolveMatrix',
'feDiffuseLighting',
'feDisplacementMap',
'feDistantLight',
'feFlood',
'feFuncA',
'feFuncB',
'feFuncG',
'feFuncR',
'feGaussianBlur',
'feMerge',
'feMergeNode',
'feMorphology',
'feOffset',
'fePointLight',
'feSpecularLighting',
'feSpotLight',
'feTile',
'feTurbulence',
];
/**
* Associative array of all allowed namespace URIs
*
* @var array
*/
public static $allowedNamespaces = [
'xmlns' => 'http://www.w3.org/2000/svg',
'xmlns:svg' => 'http://www.w3.org/2000/svg',
'xmlns:xlink' => 'http://www.w3.org/1999/xlink'
'' => 'http://www.w3.org/2000/svg',
'xlink' => 'http://www.w3.org/1999/xlink'
];
/**
* Associative array of all allowed tag names with the value
* of either an array with the list of all allowed attributes
* for this tag, `true` to allow any attribute from the
* `allowedAttrs` list or `false` to allow the tag without
* any attributes
*
* @todo Move attributes from the global list to their tags
*
* @var array
*/
public static $allowedTags = [
'svg' => true,
'a' => true,
'altGlyph' => true,
'altGlyphDef' => true,
'altGlyphItem' => true,
'animateColor' => true,
'animateMotion' => true,
'animateTransform' => true,
'circle' => true,
'clipPath' => true,
'defs' => true,
'desc' => true,
'ellipse' => true,
'filter' => true,
'font' => true,
'g' => true,
'glyph' => true,
'glyphRef' => true,
'hkern' => true,
'image' => true,
'line' => true,
'linearGradient' => true,
'marker' => true,
'mask' => true,
'metadata' => true,
'mpath' => true,
'path' => true,
'pattern' => true,
'polygon' => true,
'polyline' => true,
'radialGradient' => true,
'rect' => true,
'stop' => true,
'style' => true,
'switch' => true,
'symbol' => true,
'text' => true,
'textPath' => true,
'title' => true,
'tref' => true,
'tspan' => true,
'use' => true,
'view' => true,
'vkern' => true,
// filters
'feBlend' => true,
'feColorMatrix' => true,
'feComponentTransfer' => true,
'feComposite' => true,
'feConvolveMatrix' => true,
'feDiffuseLighting' => true,
'feDisplacementMap' => true,
'feDistantLight' => true,
'feFlood' => true,
'feFuncA' => true,
'feFuncB' => true,
'feFuncG' => true,
'feFuncR' => true,
'feGaussianBlur' => true,
'feMerge' => true,
'feMergeNode' => true,
'feMorphology' => true,
'feOffset' => true,
'fePointLight' => true,
'feSpecularLighting' => true,
'feSpotLight' => true,
'feTile' => true,
'feTurbulence' => true,
];
/**
* Array of explicitly disallowed tags
*
* IMPORTANT: Use lower-case names here because
* of the case-insensitive matching
*
* @var array
*/
public static $disallowedElements = [
public static $disallowedTags = [
'animate',
'color-profile',
'cursor',
@ -323,164 +358,120 @@ class Svg extends Xml
];
/**
* Validates file contents
* Custom callback for additional attribute sanitization
* @internal
*
* @param string $string
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the file didn't pass validation
* @param \DOMAttr $attr
* @return array Array with exception objects for each modification
*/
public static function validate(string $string): void
public static function sanitizeAttr(DOMAttr $attr): array
{
$svg = static::parse($string);
$element = $attr->ownerElement;
$name = $attr->name;
$value = $attr->value;
$errors = [];
$rootName = $svg->documentElement->nodeName;
if ($rootName !== 'svg') {
throw new InvalidArgumentException('The file is not a SVG (got <' . $rootName . '>)');
}
// block nested <use> elements ("Billion Laughs" DoS attack)
if (
$element->localName === 'use' &&
Str::contains($name, 'href') !== false &&
Str::startsWith($value, '#') === true
) {
// find the target (used element)
$id = str_replace('"', '', mb_substr($value, 1));
$target = (new DOMXPath($attr->ownerDocument))->query('//*[@id="' . $id . '"]')->item(0);
parent::validateDom($svg);
}
/**
* Validates the attributes of an element
*
* @param \DOMXPath $xPath
* @param \DOMNode $element
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If any of the attributes is not valid
*/
protected static function validateAttrs(DOMXPath $xPath, DOMNode $element): void
{
$elementName = $element->nodeName;
foreach ($element->attributes ?? [] as $attr) {
$attrName = $attr->nodeName;
$attrValue = $attr->nodeValue;
// allow all aria and data attributes
$beginning = mb_substr($attrName, 0, 5);
if ($beginning === 'aria-' || $beginning === 'data-') {
continue;
}
if (in_array($attrName, static::$allowedAttributes) !== true) {
throw new InvalidArgumentException(
'The "' . $attrName . '" attribute (line ' .
$attr->getLineNo() . ') is not allowed in SVGs'
);
}
// block nested <use> elements ("Billion Laughs" DoS attack)
// the target must not contain any other <use> elements
if (
$elementName === 'use' &&
Str::contains($attrName, 'href') !== false &&
Str::startsWith($attrValue, '#') === true
is_a($target, 'DOMElement') === true &&
$target->getElementsByTagName('use')->count() > 0
) {
// find the target (used element)
$id = str_replace('"', '', mb_substr($attrValue, 1));
$target = $xPath->query('//*[@id="' . $id . '"]')->item(0);
// the target must not contain any other <use> elements
if (
is_a($target, 'DOMElement') === true &&
$target->getElementsByTagName('use')->count() > 0
) {
throw new InvalidArgumentException(
'Nested "use" elements are not allowed in SVGs (used in line ' .
$element->getLineNo() . ')'
);
}
$errors[] = new InvalidArgumentException(
'Nested "use" elements are not allowed' .
' (used in line ' . $element->getLineNo() . ')'
);
$element->removeAttributeNode($attr);
}
}
// validate `xmlns` attributes as well, which can only
// be properly extracted using SimpleXML
if (is_a($element, 'DOMElement') === true) {
$simpleXmlElement = simplexml_import_dom($element);
foreach ($simpleXmlElement->getDocNamespaces(false, false) as $namespace => $value) {
$namespace = 'xmlns' . ($namespace ? ':' . $namespace : '');
// check if the namespace is allowlisted
if (
isset(static::$allowedNamespaces[$namespace]) !== true ||
static::$allowedNamespaces[$namespace] !== $value
) {
throw new InvalidArgumentException(
'The namespace "' . $namespace . '" (around line ' .
$element->getLineNo() . ') is not allowed or has an invalid value'
);
}
}
}
parent::validateAttrs($xPath, $element);
return $errors;
}
/**
* Validates the doctype if present
* Custom callback for additional element sanitization
* @internal
*
* @param \DOMElement $element
* @return array Array with exception objects for each modification
*/
public static function sanitizeElement(DOMElement $element): array
{
$errors = [];
// check for URLs inside <style> elements
if ($element->tagName === 'style') {
foreach (Dom::extractUrls($element->textContent) as $url) {
if (Dom::isAllowedUrl($url, static::options()) !== true) {
$errors[] = new InvalidArgumentException(
'The URL is not allowed in the "style" element' .
' (around line ' . $element->getLineNo() . ')'
);
Dom::remove($element);
}
}
}
return $errors;
}
/**
* Custom callback for additional doctype validation
* @internal
*
* @param \DOMDocumentType $doctype
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the doctype is not valid
*/
protected static function validateDoctype(DOMDocumentType $doctype): void
public static function validateDoctype(DOMDocumentType $doctype): void
{
if (mb_strtolower($doctype->name) !== 'svg') {
throw new InvalidArgumentException('Invalid doctype');
}
parent::validateDoctype($doctype);
}
/**
* Validates all given DOM elements and their attributes
* Returns the sanitization options for the handler
*
* @param \DOMXPath $xPath
* @param \DOMNodeList $elements
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If any of the elements is not valid
* @return array
*/
protected static function validateElements(DOMXPath $xPath, DOMNodeList $elements): void
protected static function options(): array
{
$allowedElements = array_merge(static::$allowedElements, static::$allowedFilters);
return array_merge(parent::options(), [
'allowedAttrPrefixes' => static::$allowedAttrPrefixes,
'allowedAttrs' => static::$allowedAttrs,
'allowedNamespaces' => static::$allowedNamespaces,
'allowedTags' => static::$allowedTags,
'disallowedTags' => static::$disallowedTags,
]);
}
foreach ($elements as $element) {
$elementName = $element->nodeName;
$elementNameLower = mb_strtolower($elementName);
/**
* Parses the given string into a `Toolkit\Dom` object
*
* @param string $string
* @return \Kirby\Toolkit\Dom
*
* @throws \Kirby\Exception\InvalidArgumentException If the file couldn't be parsed
*/
protected static function parse(string $string)
{
$svg = parent::parse($string);
// check for block-listed elements
if (in_array($elementNameLower, static::$disallowedElements) === true) {
throw new InvalidArgumentException(
'The "' . $elementName . '" element (line ' .
$element->getLineNo() . ') is not allowed in SVGs'
);
}
// check for allow-listed elements
if (in_array($elementName, $allowedElements) === false) {
throw new InvalidArgumentException(
'The "' . $elementName . '" element (line ' .
$element->getLineNo() . ') is not allowed in SVGs'
);
}
// check for URLs inside <style> elements
if ($elementName === 'style') {
foreach (static::extractUrls($element->textContent) as $url) {
if (static::isAllowedUrl($url) !== true) {
throw new InvalidArgumentException(
'The URL is not allowed in the <style> element' .
' (around line ' . $element->getLineNo() . ')'
);
}
}
}
// basic validation before we continue sanitizing/validating
$rootName = $svg->document()->documentElement->nodeName;
if ($rootName !== 'svg') {
throw new InvalidArgumentException('The file is not a SVG (got <' . $rootName . '>)');
}
parent::validateElements($xPath, $elements);
return $svg;
}
}

View file

@ -16,24 +16,57 @@ use Kirby\Exception\InvalidArgumentException;
*/
class Svgz extends Svg
{
/**
* Sanitizes the given string
*
* @param string $string
* @return string
*
* @throws \Kirby\Exception\InvalidArgumentException If the file couldn't be parsed or recompressed
*/
public static function sanitize(string $string): string
{
$string = static::uncompress($string);
$string = parent::sanitize($string);
$string = @gzencode($string);
if (is_string($string) !== true) {
throw new InvalidArgumentException('Could not recompress gzip data'); // @codeCoverageIgnore
}
return $string;
}
/**
* Validates file contents
*
* @param string $string
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the file couldn't be parsed
* @throws \Kirby\Exception\InvalidArgumentException If the file didn't pass validation
*/
public static function validate(string $string): void
{
parent::validate(static::uncompress($string));
}
/**
* Uncompresses the SVGZ data
*
* @param string $string
* @return string
*/
protected static function uncompress(string $string): string
{
// only support uncompressed files up to 10 MB to
// prevent gzip bombs from crashing the process
$uncompressed = @gzdecode($string, 10000000);
$string = @gzdecode($string, 10000000);
if (is_string($uncompressed) !== true) {
if (is_string($string) !== true) {
throw new InvalidArgumentException('Could not uncompress gzip data');
}
parent::validate($uncompressed);
return $string;
}
}

View file

@ -2,11 +2,8 @@
namespace Kirby\Sane;
use DOMDocument;
use DOMDocumentType;
use DOMNode;
use DOMNodeList;
use DOMXPath;
use DOMElement;
use Kirby\Exception\InvalidArgumentException;
use Kirby\Toolkit\Str;
@ -21,237 +18,49 @@ use Kirby\Toolkit\Str;
* @copyright Bastian Allgeier GmbH
* @license https://opensource.org/licenses/MIT
*/
class Xml extends Handler
class Xml extends DomHandler
{
public static $allowedDataAttrs = [
'data:image/png',
'data:image/gif',
'data:image/jpg',
'data:image/jpe',
'data:image/pjp',
'data:img/png',
'data:img/gif',
'data:img/jpg',
'data:img/jpe',
'data:img/pjp',
];
public static $allowedDomains = [];
public static $allowedPIs = [];
/**
* Validates file contents
* Custom callback for additional element sanitization
* @internal
*
* @param string $string
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the file didn't pass validation
* @param \DOMElement $element
* @return array Array with exception objects for each modification
*/
public static function validate(string $string): void
public static function sanitizeElement(DOMElement $element): array
{
$xml = static::parse($string);
$errors = [];
static::validateDom($xml);
}
/**
* Extracts all URLs wrapped in a url() wrapper. E.g. for style attributes.
*
* @param string $value
* @return array
*/
protected static function extractUrls(string $value): array
{
$count = preg_match_all(
'!url\(\s*[\'"]?(.*?)[\'"]?\s*\)!i',
static::trim($value),
$matches,
PREG_PATTERN_ORDER
);
if (is_int($count) === true && $count > 0) {
return $matches[1];
}
return [];
}
/**
* Checks if the URL is acceptable for href attributes
*
* @param string $url
* @return bool
*/
protected static function isAllowedUrl(string $url): bool
{
$url = mb_strtolower($url);
// allow empty URL values
if (empty($url) === true) {
return true;
}
// allow URLs that point to fragments inside the file
// as well as site-internal URLs
if (in_array(mb_substr($url, 0, 1), ['#', '/']) === true) {
return true;
}
// allow specific HTTP(S) URLs
if (
Str::startsWith($url, 'http://') === true ||
Str::startsWith($url, 'https://') === true
) {
$hostname = parse_url($url, PHP_URL_HOST);
if (in_array($hostname, static::$allowedDomains) === true) {
return true;
}
}
// allow listed data URIs
foreach (static::$allowedDataAttrs as $dataAttr) {
if (Str::startsWith($url, $dataAttr) === true) {
return true;
}
}
return false;
}
/**
* Tries to parse an XML string
*
* @param string $string
* @return \DOMDocument
*
* @throws \Kirby\Exception\InvalidArgumentException If the file couldn't be parsed
*/
protected static function parse(string $string)
{
$xml = new DOMDocument();
$xml->preserveWhiteSpace = false;
$xml->strictErrorChecking = false;
$loaderSetting = null;
if (\PHP_VERSION_ID < 80000) {
// prevent loading external entities to protect against XXE attacks;
// only needed for PHP versions before 8.0 (the function was deprecated
// as the disabled state is the new default in PHP 8.0+)
$loaderSetting = libxml_disable_entity_loader(true);
}
// switch to "user error handling"
$intErrorsSetting = libxml_use_internal_errors(true);
$load = $xml->loadXML($string);
if (\PHP_VERSION_ID < 80000) {
// ensure that we don't alter global state by
// resetting the original value
libxml_disable_entity_loader($loaderSetting);
}
// get one error for use below and reset the global state
$error = libxml_get_last_error();
libxml_clear_errors();
libxml_use_internal_errors($intErrorsSetting);
if ($load !== true) {
$message = 'The file could not be parsed';
if ($error !== false) {
$message .= ': ' . $error->message;
}
throw new InvalidArgumentException([
'fallback' => $message,
'details' => compact('error')
]);
}
return $xml;
}
/**
* Removes invisible ASCII characters from the value
*
* @param string $value
* @return string
*/
protected static function trim(string $value): string
{
return trim(preg_replace('/[^ -~]/u', '', $value));
}
/**
* Validates the attributes of an element
*
* @param \DOMXPath $xPath
* @param \DOMNode $element
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If any of the attributes is not valid
*/
protected static function validateAttrs(DOMXPath $xPath, DOMNode $element): void
{
$elementName = $element->nodeName;
foreach ($element->attributes ?? [] as $attr) {
$attrName = $attr->nodeName;
$attrValue = $attr->nodeValue;
if (Str::contains($attrName, 'href') !== false) {
if (static::isAllowedUrl($attrValue) !== true) {
throw new InvalidArgumentException(
'The URL is not allowed in attribute: ' . $attrName .
' (line ' . $attr->getLineNo() . ')'
);
}
} else {
// check for unwanted URLs in other attributes
foreach (static::extractUrls($attrValue) as $url) {
if (static::isAllowedUrl($url) !== true) {
throw new InvalidArgumentException(
'The URL is not allowed in attribute: ' . $attrName .
' (line ' . $attr->getLineNo() . ')'
);
}
}
}
}
// if we are validating an XML file, block
// all SVG and HTML namespaces
if (static::class === self::class && is_a($element, 'DOMElement') === true) {
// if we are validating an XML file, block all SVG and HTML namespaces
if (static::class === self::class) {
$simpleXmlElement = simplexml_import_dom($element);
foreach ($simpleXmlElement->getDocNamespaces(false, false) as $namespace => $value) {
if (
Str::contains($value, 'html', true) === true ||
Str::contains($value, 'svg', true) === true
) {
throw new InvalidArgumentException(
'The namespace is not allowed in XML files' .
$element->removeAttributeNS($value, $namespace);
$errors[] = new InvalidArgumentException(
'The namespace "' . $value . '" is not allowed' .
' (around line ' . $element->getLineNo() . ')'
);
}
}
}
return $errors;
}
/**
* Validates the doctype if present
* Custom callback for additional doctype validation
* @internal
*
* @param \DOMDocumentType $doctype
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the doctype is not valid
*/
protected static function validateDoctype(DOMDocumentType $doctype): void
public static function validateDoctype(DOMDocumentType $doctype): void
{
// if we are validating an XML file, block
// all SVG and HTML doctypes
// if we are validating an XML file, block all SVG and HTML doctypes
if (
static::class === self::class &&
(
@ -261,79 +70,5 @@ class Xml extends Handler
) {
throw new InvalidArgumentException('The doctype is not allowed in XML files');
}
if (empty($doctype->publicId) === false || empty($doctype->systemId) === false) {
throw new InvalidArgumentException('The doctype must not reference external files');
}
if (empty($doctype->internalSubset) === false) {
throw new InvalidArgumentException('The doctype must not define a subset');
}
}
/**
* Validates a DOMDocument tree
*
* @param \DOMDocument $string
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If the document didn't pass validation
*/
protected static function validateDom(DOMDocument $xml): void
{
foreach ($xml->childNodes as $child) {
if (is_a($child, 'DOMDocumentType') === true) {
static::validateDoctype($child);
}
}
// validate all processing instructions like <?xml-stylesheet
$xPath = new DOMXPath($xml);
$pis = $xPath->query('//processing-instruction()');
static::validateProcessingInstructions($pis);
// validate all elements in the document tree
$elements = $xml->getElementsByTagName('*');
static::validateElements($xPath, $elements);
}
/**
* Validates all given DOM elements and their attributes
*
* @param \DOMXPath $xPath
* @param \DOMNodeList $elements
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If any of the elements is not valid
*/
protected static function validateElements(DOMXPath $xPath, DOMNodeList $elements): void
{
foreach ($elements as $element) {
// check for allow-listed attributes
static::validateAttrs($xPath, $element);
}
}
/**
* Validates the values of all given processing instructions
*
* @param \DOMNodeList $elements
* @return void
*
* @throws \Kirby\Exception\InvalidArgumentException If any of the processing instructions is not valid
*/
protected static function validateProcessingInstructions(DOMNodeList $elements): void
{
foreach ($elements as $element) {
$elementName = $element->nodeName;
// check for allow-listed processing instructions
if (in_array($elementName, static::$allowedPIs) === false) {
throw new InvalidArgumentException(
'The "' . $elementName . '" processing instruction (line ' .
$element->getLineNo() . ') is not allowed'
);
}
}
}
}