mirror of
https://github.com/morpheus65535/bazarr.git
synced 2025-04-24 14:47:16 -04:00
Replaced imghdr with filetype for image detection
This commit is contained in:
parent
2c7294de0d
commit
9825a3a109
28 changed files with 2894 additions and 0 deletions
11
custom_libs/imghdr.py
Normal file
11
custom_libs/imghdr.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
import filetype
|
||||
|
||||
_IMG_MIME = {
|
||||
'image/jpeg': 'jpeg',
|
||||
'image/png': 'png',
|
||||
'image/gif': 'gif'
|
||||
}
|
||||
|
||||
def what(_, img):
|
||||
img_type = filetype.guess(img)
|
||||
return _IMG_MIME.get(img_type.mime) if img_type else None
|
8
libs/bin/filetype
Executable file
8
libs/bin/filetype
Executable file
|
@ -0,0 +1,8 @@
|
|||
#!/usr/local/opt/python@3.8/bin/python3.8
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from filetype.__main__ import main
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(main())
|
1
libs/filetype-1.2.0.dist-info/INSTALLER
Normal file
1
libs/filetype-1.2.0.dist-info/INSTALLER
Normal file
|
@ -0,0 +1 @@
|
|||
pip
|
21
libs/filetype-1.2.0.dist-info/LICENSE
Normal file
21
libs/filetype-1.2.0.dist-info/LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 Tomás Aparicio
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
212
libs/filetype-1.2.0.dist-info/METADATA
Normal file
212
libs/filetype-1.2.0.dist-info/METADATA
Normal file
|
@ -0,0 +1,212 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: filetype
|
||||
Version: 1.2.0
|
||||
Summary: Infer file type and MIME type of any file/buffer. No external dependencies.
|
||||
Home-page: https://github.com/h2non/filetype.py
|
||||
Download-URL: https://github.com/h2non/filetype.py/tarball/master
|
||||
Author: Tomas Aparicio
|
||||
Author-email: tomas@aparicio.me
|
||||
License: MIT
|
||||
Keywords: file libmagic magic infer numbers magicnumbers discovery mime type kind
|
||||
Platform: any
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Environment :: Console
|
||||
Classifier: Environment :: Web Environment
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: Intended Audience :: System Administrators
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.5
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Topic :: System
|
||||
Classifier: Topic :: System :: Filesystems
|
||||
Classifier: Topic :: Utilities
|
||||
License-File: LICENSE
|
||||
|
||||
filetype.py |Build Status| |PyPI| |Pyversions| |API|
|
||||
====================================================
|
||||
|
||||
Small and dependency free `Python`_ package to infer file type and MIME
|
||||
type checking the `magic numbers`_ signature of a file or buffer.
|
||||
|
||||
This is a Python port from `filetype`_ Go package.
|
||||
|
||||
Features
|
||||
--------
|
||||
|
||||
- Simple and friendly API
|
||||
- Supports a `wide range`_ of file types
|
||||
- Provides file extension and MIME type inference
|
||||
- File discovery by extension or MIME type
|
||||
- File discovery by kind (image, video, audio…)
|
||||
- `Pluggable`_: add new custom type matchers
|
||||
- `Fast`_, even processing large files
|
||||
- Only first 261 bytes representing the max file header is required, so
|
||||
you can just `pass a list of bytes`_
|
||||
- Dependency free (just Python code, no C extensions, no libmagic
|
||||
bindings)
|
||||
- Cross-platform file recognition
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
::
|
||||
|
||||
pip install filetype
|
||||
|
||||
API
|
||||
---
|
||||
|
||||
See `annotated API reference`_.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Simple file type checking
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
import filetype
|
||||
|
||||
def main():
|
||||
kind = filetype.guess('tests/fixtures/sample.jpg')
|
||||
if kind is None:
|
||||
print('Cannot guess file type!')
|
||||
return
|
||||
|
||||
print('File extension: %s' % kind.extension)
|
||||
print('File MIME type: %s' % kind.mime)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
Supported types
|
||||
---------------
|
||||
|
||||
Image
|
||||
^^^^^
|
||||
|
||||
- **dwg** - ``image/vnd.dwg``
|
||||
- **xcf** - ``image/x-xcf``
|
||||
- **jpg** - ``image/jpeg``
|
||||
- **jpx** - ``image/jpx``
|
||||
- **png** - ``image/png``
|
||||
- **apng** - ``image/apng``
|
||||
- **gif** - ``image/gif``
|
||||
- **webp** - ``image/webp``
|
||||
- **cr2** - ``image/x-canon-cr2``
|
||||
- **tif** - ``image/tiff``
|
||||
- **bmp** - ``image/bmp``
|
||||
- **jxr** - ``image/vnd.ms-photo``
|
||||
- **psd** - ``image/vnd.adobe.photoshop``
|
||||
- **ico** - ``image/x-icon``
|
||||
- **heic** - ``image/heic``
|
||||
- **avif** - ``image/avif``
|
||||
|
||||
Video
|
||||
^^^^^
|
||||
|
||||
- **3gp** - ``video/3gpp``
|
||||
- **mp4** - ``video/mp4``
|
||||
- **m4v** - ``video/x-m4v``
|
||||
- **mkv** - ``video/x-matroska``
|
||||
- **webm** - ``video/webm``
|
||||
- **mov** - ``video/quicktime``
|
||||
- **avi** - ``video/x-msvideo``
|
||||
- **wmv** - ``video/x-ms-wmv``
|
||||
- **mpg** - ``video/mpeg``
|
||||
- **flv** - ``video/x-flv``
|
||||
|
||||
Audio
|
||||
^^^^^
|
||||
|
||||
- **aac** - ``audio/aac``
|
||||
- **mid** - ``audio/midi``
|
||||
- **mp3** - ``audio/mpeg``
|
||||
- **m4a** - ``audio/mp4``
|
||||
- **ogg** - ``audio/ogg``
|
||||
- **flac** - ``audio/x-flac``
|
||||
- **wav** - ``audio/x-wav``
|
||||
- **amr** - ``audio/amr``
|
||||
- **aiff** - ``audio/x-aiff``
|
||||
|
||||
Archive
|
||||
^^^^^^^
|
||||
|
||||
- **br** - ``application/x-brotli``
|
||||
- **rpm** - ``application/x-rpm``
|
||||
- **dcm** - ``application/dicom``
|
||||
- **epub** - ``application/epub+zip``
|
||||
- **zip** - ``application/zip``
|
||||
- **tar** - ``application/x-tar``
|
||||
- **rar** - ``application/x-rar-compressed``
|
||||
- **gz** - ``application/gzip``
|
||||
- **bz2** - ``application/x-bzip2``
|
||||
- **7z** - ``application/x-7z-compressed``
|
||||
- **xz** - ``application/x-xz``
|
||||
- **pdf** - ``application/pdf``
|
||||
- **exe** - ``application/x-msdownload``
|
||||
- **swf** - ``application/x-shockwave-flash``
|
||||
- **rtf** - ``application/rtf``
|
||||
- **eot** - ``application/octet-stream``
|
||||
- **ps** - ``application/postscript``
|
||||
- **sqlite** - ``application/x-sqlite3``
|
||||
- **nes** - ``application/x-nintendo-nes-rom``
|
||||
- **crx** - ``application/x-google-chrome-extension``
|
||||
- **cab** - ``application/vnd.ms-cab-compressed``
|
||||
- **deb** - ``application/x-deb``
|
||||
- **ar** - ``application/x-unix-archive``
|
||||
- **Z** - ``application/x-compress``
|
||||
- **lzo** - ``application/x-lzop``
|
||||
- **lz** - ``application/x-lzip``
|
||||
- **lz4** - ``application/x-lz4``
|
||||
- **zstd** - ``application/zstd``
|
||||
|
||||
Document
|
||||
^^^^^^^^
|
||||
|
||||
- **doc** - ``application/msword``
|
||||
- **docx** - ``application/vnd.openxmlformats-officedocument.wordprocessingml.document``
|
||||
- **odt** - ``application/vnd.oasis.opendocument.text``
|
||||
- **xls** - ``application/vnd.ms-excel``
|
||||
- **xlsx** - ``application/vnd.openxmlformats-officedocument.spreadsheetml.sheet``
|
||||
- **ods** - ``application/vnd.oasis.opendocument.spreadsheet``
|
||||
- **ppt** - ``application/vnd.ms-powerpoint``
|
||||
- **pptx** - ``application/vnd.openxmlformats-officedocument.presentationml.presentation``
|
||||
- **odp** - ``application/vnd.oasis.opendocument.presentation``
|
||||
|
||||
Font
|
||||
^^^^
|
||||
|
||||
- **woff** - ``application/font-woff``
|
||||
- **woff2** - ``application/font-woff``
|
||||
- **ttf** - ``application/font-sfnt``
|
||||
- **otf** - ``application/font-sfnt``
|
||||
|
||||
Application
|
||||
^^^^^^^^^^^
|
||||
|
||||
- **wasm** - ``application/wasm``
|
||||
|
||||
.. _Python: http://python.org
|
||||
.. _magic numbers: https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files
|
||||
.. _filetype: https://github.com/h2non/filetype
|
||||
.. _wide range: #supported-types
|
||||
.. _Pluggable: #add-additional-file-type-matchers
|
||||
.. _Fast: #benchmarks
|
||||
.. _pass a list of bytes: #file-header
|
||||
.. _annotated API reference: https://h2non.github.io/filetype.py/
|
||||
|
||||
.. |Build Status| image:: https://travis-ci.org/h2non/filetype.py.svg?branch=master
|
||||
:target: https://travis-ci.org/h2non/filetype.py
|
||||
.. |PyPI| image:: https://img.shields.io/pypi/v/filetype.svg?maxAge=2592000?style=flat-square
|
||||
:target: https://pypi.python.org/pypi/filetype
|
||||
.. |Pyversions| image:: https://img.shields.io/pypi/pyversions/filetype.svg?style=flat-square
|
||||
:target: https://pypi.python.org/pypi/filetype
|
||||
.. |API| image:: https://img.shields.io/badge/api-docs-green.svg
|
||||
:target: https://h2non.github.io/filetype.py
|
26
libs/filetype-1.2.0.dist-info/RECORD
Normal file
26
libs/filetype-1.2.0.dist-info/RECORD
Normal file
|
@ -0,0 +1,26 @@
|
|||
../../bin/filetype,sha256=qhZzWIjpierkU7BFhcUqdo8jpXcpw4t1BOBkR_UK1fY,239
|
||||
filetype-1.2.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
filetype-1.2.0.dist-info/LICENSE,sha256=jkTiqjWzcb3MhWvPDSRCpBDdVf3maw38L83wdtl5Rqw,1082
|
||||
filetype-1.2.0.dist-info/METADATA,sha256=IukENsJLniEMUy5Kauljm-JAi2RR1th0YD3E7hXw8UM,6532
|
||||
filetype-1.2.0.dist-info/RECORD,,
|
||||
filetype-1.2.0.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
filetype-1.2.0.dist-info/WHEEL,sha256=P2T-6epvtXQ2cBOE_U1K4_noqlJFN3tj15djMgEu4NM,110
|
||||
filetype-1.2.0.dist-info/entry_points.txt,sha256=FW9vQKv-y3mEcT51mUaPeIu3vixzcr6WdLfjD4SFUVM,52
|
||||
filetype-1.2.0.dist-info/top_level.txt,sha256=9E4F1bIRPoq5TGtC-BHwM1_svcsWYRiC0N_qAGrlW0Y,9
|
||||
filetype-1.2.0.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
||||
filetype/__init__.py,sha256=7c1C2XIbB7md1oI-0nwBzxoD52he_7NYry3YQV5OXa8,223
|
||||
filetype/__main__.py,sha256=4-2VK-0hB2mLL_HukB8cOa0jsQKLq95gG4UhCPqF0rg,803
|
||||
filetype/filetype.py,sha256=SBYUBugfBQSO9z7zyWaXOak6UpLUlmZZ--5FpN0fybM,2122
|
||||
filetype/helpers.py,sha256=O0hofWlmG8J_X81IuQ8KszvjgnUb-O6BzO-wUJRTLV0,2947
|
||||
filetype/match.py,sha256=XUHst4XDmYlJtfYAMlGuySl2IWia2UoCb8NIDYiCRgI,3288
|
||||
filetype/types/__init__.py,sha256=baH8xCYyatykxtCUccgGGIwNdit6x5jGsXxWTvpo4t8,2085
|
||||
filetype/types/application.py,sha256=6Knc4Y38GbfuoSjdPl29vIsusjNIPjLWVk22nxCnS9I,498
|
||||
filetype/types/archive.py,sha256=kZWEHJmJ1NmQT0Hm-JmEHI1TXg5NrzxJ4YCbfV-6y8c,17006
|
||||
filetype/types/audio.py,sha256=oOAS-cdA175rELcK_17w-gylJkmSh8FTrAoVAOwsfUA,4960
|
||||
filetype/types/base.py,sha256=dvvqVjuSqwtbh2qyP7QnmeWUWUsfrHwJ_rOEgJmDQZ8,647
|
||||
filetype/types/document.py,sha256=mxOhuymNIpsqMWCgy-Fm8vkSgDoSeCXYADxmO1JPx6Q,7513
|
||||
filetype/types/font.py,sha256=nP5Ey-EcKMU4phGYtIlQ08I5cecWnr5vzDLVbiPOiyY,2924
|
||||
filetype/types/image.py,sha256=r8pINANPJZbCEmZKn8F74fFffk4INtDin_GtQtQImZs,9130
|
||||
filetype/types/isobmff.py,sha256=zLXCbTET6wp_9yq8jE3bhBRTaCdSAKma5ElyHVGd2Sk,958
|
||||
filetype/types/video.py,sha256=DfkFd5ofnEK25r_n71LxjX3nAAgO8xJ7Op_lL9uEbNc,5371
|
||||
filetype/utils.py,sha256=sjZCMfYawZ6RWN1Dr3jDmsqIjLSEBFubNgi8HROjaPQ,2089
|
0
libs/filetype-1.2.0.dist-info/REQUESTED
Normal file
0
libs/filetype-1.2.0.dist-info/REQUESTED
Normal file
6
libs/filetype-1.2.0.dist-info/WHEEL
Normal file
6
libs/filetype-1.2.0.dist-info/WHEEL
Normal file
|
@ -0,0 +1,6 @@
|
|||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.41.3)
|
||||
Root-Is-Purelib: true
|
||||
Tag: py2-none-any
|
||||
Tag: py3-none-any
|
||||
|
2
libs/filetype-1.2.0.dist-info/entry_points.txt
Normal file
2
libs/filetype-1.2.0.dist-info/entry_points.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
[console_scripts]
|
||||
filetype = filetype.__main__:main
|
1
libs/filetype-1.2.0.dist-info/top_level.txt
Normal file
1
libs/filetype-1.2.0.dist-info/top_level.txt
Normal file
|
@ -0,0 +1 @@
|
|||
filetype
|
1
libs/filetype-1.2.0.dist-info/zip-safe
Normal file
1
libs/filetype-1.2.0.dist-info/zip-safe
Normal file
|
@ -0,0 +1 @@
|
|||
|
10
libs/filetype/__init__.py
Normal file
10
libs/filetype/__init__.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .filetype import * # noqa
|
||||
from .helpers import * # noqa
|
||||
from .match import * # noqa
|
||||
|
||||
# Current package semver version
|
||||
__version__ = version = '1.2.0'
|
37
libs/filetype/__main__.py
Normal file
37
libs/filetype/__main__.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
import sys
|
||||
|
||||
import filetype
|
||||
|
||||
|
||||
def guess(path):
|
||||
kind = filetype.guess(path)
|
||||
if kind is None:
|
||||
print('{}: File type determination failure.'.format(path))
|
||||
else:
|
||||
print('{}: {} ({})'.format(path, kind.extension, kind.mime))
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='filetype', description='Determine type of FILEs.'
|
||||
)
|
||||
parser.add_argument('-f', '--file', nargs='+')
|
||||
parser.add_argument(
|
||||
'-v', '--version', action='version',
|
||||
version='%(prog)s ' + filetype.version,
|
||||
help='output version information and exit'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
if len(sys.argv) < 2:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
for i in args.file:
|
||||
guess(i)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
98
libs/filetype/filetype.py
Normal file
98
libs/filetype/filetype.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .match import match
|
||||
from .types import TYPES, Type
|
||||
|
||||
# Expose supported matchers types
|
||||
types = TYPES
|
||||
|
||||
|
||||
def guess(obj):
|
||||
"""
|
||||
Infers the type of the given input.
|
||||
|
||||
Function is overloaded to accept multiple types in input
|
||||
and peform the needed type inference based on it.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
The matched type instance. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return match(obj) if obj else None
|
||||
|
||||
|
||||
def guess_mime(obj):
|
||||
"""
|
||||
Infers the file type of the given input
|
||||
and returns its MIME type.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
The matched MIME type as string. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
kind = guess(obj)
|
||||
return kind.mime if kind else kind
|
||||
|
||||
|
||||
def guess_extension(obj):
|
||||
"""
|
||||
Infers the file type of the given input
|
||||
and returns its RFC file extension.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
The matched file extension as string. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
kind = guess(obj)
|
||||
return kind.extension if kind else kind
|
||||
|
||||
|
||||
def get_type(mime=None, ext=None):
|
||||
"""
|
||||
Returns the file type instance searching by
|
||||
MIME type or file extension.
|
||||
|
||||
Args:
|
||||
ext: file extension string. E.g: jpg, png, mp4, mp3
|
||||
mime: MIME string. E.g: image/jpeg, video/mpeg
|
||||
|
||||
Returns:
|
||||
The matched file type instance. Otherwise None.
|
||||
"""
|
||||
for kind in types:
|
||||
if kind.extension == ext or kind.mime == mime:
|
||||
return kind
|
||||
return None
|
||||
|
||||
|
||||
def add_type(instance):
|
||||
"""
|
||||
Adds a new type matcher instance to the supported types.
|
||||
|
||||
Args:
|
||||
instance: Type inherited instance.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if not isinstance(instance, Type):
|
||||
raise TypeError('instance must inherit from filetype.types.Type')
|
||||
|
||||
types.insert(0, instance)
|
140
libs/filetype/helpers.py
Normal file
140
libs/filetype/helpers.py
Normal file
|
@ -0,0 +1,140 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
from .types import TYPES
|
||||
from .match import (
|
||||
image_match, font_match, document_match,
|
||||
video_match, audio_match, archive_match
|
||||
)
|
||||
|
||||
|
||||
def is_extension_supported(ext):
|
||||
"""
|
||||
Checks if the given extension string is
|
||||
one of the supported by the file matchers.
|
||||
|
||||
Args:
|
||||
ext (str): file extension string. E.g: jpg, png, mp4, mp3
|
||||
|
||||
Returns:
|
||||
True if the file extension is supported.
|
||||
Otherwise False.
|
||||
"""
|
||||
for kind in TYPES:
|
||||
if kind.extension == ext:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_mime_supported(mime):
|
||||
"""
|
||||
Checks if the given MIME type string is
|
||||
one of the supported by the file matchers.
|
||||
|
||||
Args:
|
||||
mime (str): MIME string. E.g: image/jpeg, video/mpeg
|
||||
|
||||
Returns:
|
||||
True if the MIME type is supported.
|
||||
Otherwise False.
|
||||
"""
|
||||
for kind in TYPES:
|
||||
if kind.mime == mime:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_image(obj):
|
||||
"""
|
||||
Checks if a given input is a supported type image.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
True if obj is a valid image. Otherwise False.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return image_match(obj) is not None
|
||||
|
||||
|
||||
def is_archive(obj):
|
||||
"""
|
||||
Checks if a given input is a supported type archive.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
True if obj is a valid archive. Otherwise False.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return archive_match(obj) is not None
|
||||
|
||||
|
||||
def is_audio(obj):
|
||||
"""
|
||||
Checks if a given input is a supported type audio.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
True if obj is a valid audio. Otherwise False.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return audio_match(obj) is not None
|
||||
|
||||
|
||||
def is_video(obj):
|
||||
"""
|
||||
Checks if a given input is a supported type video.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
True if obj is a valid video. Otherwise False.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return video_match(obj) is not None
|
||||
|
||||
|
||||
def is_font(obj):
|
||||
"""
|
||||
Checks if a given input is a supported type font.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
True if obj is a valid font. Otherwise False.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return font_match(obj) is not None
|
||||
|
||||
|
||||
def is_document(obj):
|
||||
"""
|
||||
Checks if a given input is a supported type document.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
True if obj is a valid document. Otherwise False.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return document_match(obj) is not None
|
155
libs/filetype/match.py
Normal file
155
libs/filetype/match.py
Normal file
|
@ -0,0 +1,155 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .types import ARCHIVE as archive_matchers
|
||||
from .types import AUDIO as audio_matchers
|
||||
from .types import APPLICATION as application_matchers
|
||||
from .types import DOCUMENT as document_matchers
|
||||
from .types import FONT as font_matchers
|
||||
from .types import IMAGE as image_matchers
|
||||
from .types import VIDEO as video_matchers
|
||||
from .types import TYPES
|
||||
from .utils import get_bytes
|
||||
|
||||
|
||||
def match(obj, matchers=TYPES):
|
||||
"""
|
||||
Matches the given input against the available
|
||||
file type matchers.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
Type instance if type matches. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
buf = get_bytes(obj)
|
||||
|
||||
for matcher in matchers:
|
||||
if matcher.match(buf):
|
||||
return matcher
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def image_match(obj):
|
||||
"""
|
||||
Matches the given input against the available
|
||||
image type matchers.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
Type instance if matches. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return match(obj, image_matchers)
|
||||
|
||||
|
||||
def font_match(obj):
|
||||
"""
|
||||
Matches the given input against the available
|
||||
font type matchers.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
Type instance if matches. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return match(obj, font_matchers)
|
||||
|
||||
|
||||
def video_match(obj):
|
||||
"""
|
||||
Matches the given input against the available
|
||||
video type matchers.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
Type instance if matches. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return match(obj, video_matchers)
|
||||
|
||||
|
||||
def audio_match(obj):
|
||||
"""
|
||||
Matches the given input against the available
|
||||
autio type matchers.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
Type instance if matches. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return match(obj, audio_matchers)
|
||||
|
||||
|
||||
def archive_match(obj):
|
||||
"""
|
||||
Matches the given input against the available
|
||||
archive type matchers.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
Type instance if matches. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return match(obj, archive_matchers)
|
||||
|
||||
|
||||
def application_match(obj):
|
||||
"""
|
||||
Matches the given input against the available
|
||||
application type matchers.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
Type instance if matches. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return match(obj, application_matchers)
|
||||
|
||||
|
||||
def document_match(obj):
|
||||
"""
|
||||
Matches the given input against the available
|
||||
document type matchers.
|
||||
|
||||
Args:
|
||||
obj: path to file, bytes or bytearray.
|
||||
|
||||
Returns:
|
||||
Type instance if matches. Otherwise None.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
return match(obj, document_matchers)
|
118
libs/filetype/types/__init__.py
Normal file
118
libs/filetype/types/__init__.py
Normal file
|
@ -0,0 +1,118 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from . import archive
|
||||
from . import audio
|
||||
from . import application
|
||||
from . import document
|
||||
from . import font
|
||||
from . import image
|
||||
from . import video
|
||||
from .base import Type # noqa
|
||||
|
||||
# Supported image types
|
||||
IMAGE = (
|
||||
image.Dwg(),
|
||||
image.Xcf(),
|
||||
image.Jpeg(),
|
||||
image.Jpx(),
|
||||
image.Apng(),
|
||||
image.Png(),
|
||||
image.Gif(),
|
||||
image.Webp(),
|
||||
image.Tiff(),
|
||||
image.Cr2(),
|
||||
image.Bmp(),
|
||||
image.Jxr(),
|
||||
image.Psd(),
|
||||
image.Ico(),
|
||||
image.Heic(),
|
||||
image.Dcm(),
|
||||
image.Avif(),
|
||||
)
|
||||
|
||||
# Supported video types
|
||||
VIDEO = (
|
||||
video.M3gp(),
|
||||
video.Mp4(),
|
||||
video.M4v(),
|
||||
video.Mkv(),
|
||||
video.Mov(),
|
||||
video.Avi(),
|
||||
video.Wmv(),
|
||||
video.Mpeg(),
|
||||
video.Webm(),
|
||||
video.Flv(),
|
||||
)
|
||||
|
||||
# Supported audio types
|
||||
AUDIO = (
|
||||
audio.Aac(),
|
||||
audio.Midi(),
|
||||
audio.Mp3(),
|
||||
audio.M4a(),
|
||||
audio.Ogg(),
|
||||
audio.Flac(),
|
||||
audio.Wav(),
|
||||
audio.Amr(),
|
||||
audio.Aiff(),
|
||||
)
|
||||
|
||||
# Supported font types
|
||||
FONT = (font.Woff(), font.Woff2(), font.Ttf(), font.Otf())
|
||||
|
||||
# Supported archive container types
|
||||
ARCHIVE = (
|
||||
archive.Br(),
|
||||
archive.Rpm(),
|
||||
archive.Dcm(),
|
||||
archive.Epub(),
|
||||
archive.Zip(),
|
||||
archive.Tar(),
|
||||
archive.Rar(),
|
||||
archive.Gz(),
|
||||
archive.Bz2(),
|
||||
archive.SevenZ(),
|
||||
archive.Pdf(),
|
||||
archive.Exe(),
|
||||
archive.Swf(),
|
||||
archive.Rtf(),
|
||||
archive.Nes(),
|
||||
archive.Crx(),
|
||||
archive.Cab(),
|
||||
archive.Eot(),
|
||||
archive.Ps(),
|
||||
archive.Xz(),
|
||||
archive.Sqlite(),
|
||||
archive.Deb(),
|
||||
archive.Ar(),
|
||||
archive.Z(),
|
||||
archive.Lzop(),
|
||||
archive.Lz(),
|
||||
archive.Elf(),
|
||||
archive.Lz4(),
|
||||
archive.Zstd(),
|
||||
)
|
||||
|
||||
# Supported archive container types
|
||||
APPLICATION = (
|
||||
application.Wasm(),
|
||||
)
|
||||
|
||||
# Supported document types
|
||||
DOCUMENT = (
|
||||
document.Doc(),
|
||||
document.Docx(),
|
||||
document.Odt(),
|
||||
document.Xls(),
|
||||
document.Xlsx(),
|
||||
document.Ods(),
|
||||
document.Ppt(),
|
||||
document.Pptx(),
|
||||
document.Odp(),
|
||||
)
|
||||
|
||||
|
||||
# Expose supported type matchers
|
||||
TYPES = list(IMAGE + AUDIO + VIDEO + FONT + DOCUMENT + ARCHIVE + APPLICATION)
|
22
libs/filetype/types/application.py
Normal file
22
libs/filetype/types/application.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .base import Type
|
||||
|
||||
|
||||
class Wasm(Type):
|
||||
"""Implements the Wasm image type matcher."""
|
||||
|
||||
MIME = 'application/wasm'
|
||||
EXTENSION = 'wasm'
|
||||
|
||||
def __init__(self):
|
||||
super(Wasm, self).__init__(
|
||||
mime=Wasm.MIME,
|
||||
extension=Wasm.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return buf[:8] == bytearray([0x00, 0x61, 0x73, 0x6d,
|
||||
0x01, 0x00, 0x00, 0x00])
|
687
libs/filetype/types/archive.py
Normal file
687
libs/filetype/types/archive.py
Normal file
|
@ -0,0 +1,687 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import struct
|
||||
|
||||
from .base import Type
|
||||
|
||||
|
||||
class Epub(Type):
|
||||
"""
|
||||
Implements the EPUB archive type matcher.
|
||||
"""
|
||||
MIME = 'application/epub+zip'
|
||||
EXTENSION = 'epub'
|
||||
|
||||
def __init__(self):
|
||||
super(Epub, self).__init__(
|
||||
mime=Epub.MIME,
|
||||
extension=Epub.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 57 and
|
||||
buf[0] == 0x50 and buf[1] == 0x4B and
|
||||
buf[2] == 0x3 and buf[3] == 0x4 and
|
||||
buf[30] == 0x6D and buf[31] == 0x69 and
|
||||
buf[32] == 0x6D and buf[33] == 0x65 and
|
||||
buf[34] == 0x74 and buf[35] == 0x79 and
|
||||
buf[36] == 0x70 and buf[37] == 0x65 and
|
||||
buf[38] == 0x61 and buf[39] == 0x70 and
|
||||
buf[40] == 0x70 and buf[41] == 0x6C and
|
||||
buf[42] == 0x69 and buf[43] == 0x63 and
|
||||
buf[44] == 0x61 and buf[45] == 0x74 and
|
||||
buf[46] == 0x69 and buf[47] == 0x6F and
|
||||
buf[48] == 0x6E and buf[49] == 0x2F and
|
||||
buf[50] == 0x65 and buf[51] == 0x70 and
|
||||
buf[52] == 0x75 and buf[53] == 0x62 and
|
||||
buf[54] == 0x2B and buf[55] == 0x7A and
|
||||
buf[56] == 0x69 and buf[57] == 0x70)
|
||||
|
||||
|
||||
class Zip(Type):
|
||||
"""
|
||||
Implements the Zip archive type matcher.
|
||||
"""
|
||||
MIME = 'application/zip'
|
||||
EXTENSION = 'zip'
|
||||
|
||||
def __init__(self):
|
||||
super(Zip, self).__init__(
|
||||
mime=Zip.MIME,
|
||||
extension=Zip.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x50 and buf[1] == 0x4B and
|
||||
(buf[2] == 0x3 or buf[2] == 0x5 or
|
||||
buf[2] == 0x7) and
|
||||
(buf[3] == 0x4 or buf[3] == 0x6 or
|
||||
buf[3] == 0x8))
|
||||
|
||||
|
||||
class Tar(Type):
|
||||
"""
|
||||
Implements the Tar archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-tar'
|
||||
EXTENSION = 'tar'
|
||||
|
||||
def __init__(self):
|
||||
super(Tar, self).__init__(
|
||||
mime=Tar.MIME,
|
||||
extension=Tar.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 261 and
|
||||
buf[257] == 0x75 and
|
||||
buf[258] == 0x73 and
|
||||
buf[259] == 0x74 and
|
||||
buf[260] == 0x61 and
|
||||
buf[261] == 0x72)
|
||||
|
||||
|
||||
class Rar(Type):
|
||||
"""
|
||||
Implements the RAR archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-rar-compressed'
|
||||
EXTENSION = 'rar'
|
||||
|
||||
def __init__(self):
|
||||
super(Rar, self).__init__(
|
||||
mime=Rar.MIME,
|
||||
extension=Rar.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 6 and
|
||||
buf[0] == 0x52 and
|
||||
buf[1] == 0x61 and
|
||||
buf[2] == 0x72 and
|
||||
buf[3] == 0x21 and
|
||||
buf[4] == 0x1A and
|
||||
buf[5] == 0x7 and
|
||||
(buf[6] == 0x0 or
|
||||
buf[6] == 0x1))
|
||||
|
||||
|
||||
class Gz(Type):
|
||||
"""
|
||||
Implements the GZ archive type matcher.
|
||||
"""
|
||||
MIME = 'application/gzip'
|
||||
EXTENSION = 'gz'
|
||||
|
||||
def __init__(self):
|
||||
super(Gz, self).__init__(
|
||||
mime=Gz.MIME,
|
||||
extension=Gz.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 2 and
|
||||
buf[0] == 0x1F and
|
||||
buf[1] == 0x8B and
|
||||
buf[2] == 0x8)
|
||||
|
||||
|
||||
class Bz2(Type):
|
||||
"""
|
||||
Implements the BZ2 archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-bzip2'
|
||||
EXTENSION = 'bz2'
|
||||
|
||||
def __init__(self):
|
||||
super(Bz2, self).__init__(
|
||||
mime=Bz2.MIME,
|
||||
extension=Bz2.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 2 and
|
||||
buf[0] == 0x42 and
|
||||
buf[1] == 0x5A and
|
||||
buf[2] == 0x68)
|
||||
|
||||
|
||||
class SevenZ(Type):
|
||||
"""
|
||||
Implements the SevenZ (7z) archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-7z-compressed'
|
||||
EXTENSION = '7z'
|
||||
|
||||
def __init__(self):
|
||||
super(SevenZ, self).__init__(
|
||||
mime=SevenZ.MIME,
|
||||
extension=SevenZ.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 5 and
|
||||
buf[0] == 0x37 and
|
||||
buf[1] == 0x7A and
|
||||
buf[2] == 0xBC and
|
||||
buf[3] == 0xAF and
|
||||
buf[4] == 0x27 and
|
||||
buf[5] == 0x1C)
|
||||
|
||||
|
||||
class Pdf(Type):
|
||||
"""
|
||||
Implements the PDF archive type matcher.
|
||||
"""
|
||||
MIME = 'application/pdf'
|
||||
EXTENSION = 'pdf'
|
||||
|
||||
def __init__(self):
|
||||
super(Pdf, self).__init__(
|
||||
mime=Pdf.MIME,
|
||||
extension=Pdf.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
# Detect BOM and skip first 3 bytes
|
||||
if (len(buf) > 3 and
|
||||
buf[0] == 0xEF and
|
||||
buf[1] == 0xBB and
|
||||
buf[2] == 0xBF): # noqa E129
|
||||
buf = buf[3:]
|
||||
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x25 and
|
||||
buf[1] == 0x50 and
|
||||
buf[2] == 0x44 and
|
||||
buf[3] == 0x46)
|
||||
|
||||
|
||||
class Exe(Type):
|
||||
"""
|
||||
Implements the EXE archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-msdownload'
|
||||
EXTENSION = 'exe'
|
||||
|
||||
def __init__(self):
|
||||
super(Exe, self).__init__(
|
||||
mime=Exe.MIME,
|
||||
extension=Exe.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 1 and
|
||||
buf[0] == 0x4D and
|
||||
buf[1] == 0x5A)
|
||||
|
||||
|
||||
class Swf(Type):
|
||||
"""
|
||||
Implements the SWF archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-shockwave-flash'
|
||||
EXTENSION = 'swf'
|
||||
|
||||
def __init__(self):
|
||||
super(Swf, self).__init__(
|
||||
mime=Swf.MIME,
|
||||
extension=Swf.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 2 and
|
||||
(buf[0] == 0x43 or
|
||||
buf[0] == 0x46) and
|
||||
buf[1] == 0x57 and
|
||||
buf[2] == 0x53)
|
||||
|
||||
|
||||
class Rtf(Type):
|
||||
"""
|
||||
Implements the RTF archive type matcher.
|
||||
"""
|
||||
MIME = 'application/rtf'
|
||||
EXTENSION = 'rtf'
|
||||
|
||||
def __init__(self):
|
||||
super(Rtf, self).__init__(
|
||||
mime=Rtf.MIME,
|
||||
extension=Rtf.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 4 and
|
||||
buf[0] == 0x7B and
|
||||
buf[1] == 0x5C and
|
||||
buf[2] == 0x72 and
|
||||
buf[3] == 0x74 and
|
||||
buf[4] == 0x66)
|
||||
|
||||
|
||||
class Nes(Type):
|
||||
"""
|
||||
Implements the NES archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-nintendo-nes-rom'
|
||||
EXTENSION = 'nes'
|
||||
|
||||
def __init__(self):
|
||||
super(Nes, self).__init__(
|
||||
mime=Nes.MIME,
|
||||
extension=Nes.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x4E and
|
||||
buf[1] == 0x45 and
|
||||
buf[2] == 0x53 and
|
||||
buf[3] == 0x1A)
|
||||
|
||||
|
||||
class Crx(Type):
|
||||
"""
|
||||
Implements the CRX archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-google-chrome-extension'
|
||||
EXTENSION = 'crx'
|
||||
|
||||
def __init__(self):
|
||||
super(Crx, self).__init__(
|
||||
mime=Crx.MIME,
|
||||
extension=Crx.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x43 and
|
||||
buf[1] == 0x72 and
|
||||
buf[2] == 0x32 and
|
||||
buf[3] == 0x34)
|
||||
|
||||
|
||||
class Cab(Type):
|
||||
"""
|
||||
Implements the CAB archive type matcher.
|
||||
"""
|
||||
MIME = 'application/vnd.ms-cab-compressed'
|
||||
EXTENSION = 'cab'
|
||||
|
||||
def __init__(self):
|
||||
super(Cab, self).__init__(
|
||||
mime=Cab.MIME,
|
||||
extension=Cab.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
((buf[0] == 0x4D and
|
||||
buf[1] == 0x53 and
|
||||
buf[2] == 0x43 and
|
||||
buf[3] == 0x46) or
|
||||
(buf[0] == 0x49 and
|
||||
buf[1] == 0x53 and
|
||||
buf[2] == 0x63 and
|
||||
buf[3] == 0x28)))
|
||||
|
||||
|
||||
class Eot(Type):
|
||||
"""
|
||||
Implements the EOT archive type matcher.
|
||||
"""
|
||||
MIME = 'application/octet-stream'
|
||||
EXTENSION = 'eot'
|
||||
|
||||
def __init__(self):
|
||||
super(Eot, self).__init__(
|
||||
mime=Eot.MIME,
|
||||
extension=Eot.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 35 and
|
||||
buf[34] == 0x4C and
|
||||
buf[35] == 0x50 and
|
||||
((buf[8] == 0x02 and
|
||||
buf[9] == 0x00 and
|
||||
buf[10] == 0x01) or
|
||||
(buf[8] == 0x01 and
|
||||
buf[9] == 0x00 and
|
||||
buf[10] == 0x00) or
|
||||
(buf[8] == 0x02 and
|
||||
buf[9] == 0x00 and
|
||||
buf[10] == 0x02)))
|
||||
|
||||
|
||||
class Ps(Type):
|
||||
"""
|
||||
Implements the PS archive type matcher.
|
||||
"""
|
||||
MIME = 'application/postscript'
|
||||
EXTENSION = 'ps'
|
||||
|
||||
def __init__(self):
|
||||
super(Ps, self).__init__(
|
||||
mime=Ps.MIME,
|
||||
extension=Ps.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 1 and
|
||||
buf[0] == 0x25 and
|
||||
buf[1] == 0x21)
|
||||
|
||||
|
||||
class Xz(Type):
|
||||
"""
|
||||
Implements the XS archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-xz'
|
||||
EXTENSION = 'xz'
|
||||
|
||||
def __init__(self):
|
||||
super(Xz, self).__init__(
|
||||
mime=Xz.MIME,
|
||||
extension=Xz.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 5 and
|
||||
buf[0] == 0xFD and
|
||||
buf[1] == 0x37 and
|
||||
buf[2] == 0x7A and
|
||||
buf[3] == 0x58 and
|
||||
buf[4] == 0x5A and
|
||||
buf[5] == 0x00)
|
||||
|
||||
|
||||
class Sqlite(Type):
|
||||
"""
|
||||
Implements the Sqlite DB archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-sqlite3'
|
||||
EXTENSION = 'sqlite'
|
||||
|
||||
def __init__(self):
|
||||
super(Sqlite, self).__init__(
|
||||
mime=Sqlite.MIME,
|
||||
extension=Sqlite.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x53 and
|
||||
buf[1] == 0x51 and
|
||||
buf[2] == 0x4C and
|
||||
buf[3] == 0x69)
|
||||
|
||||
|
||||
class Deb(Type):
|
||||
"""
|
||||
Implements the DEB archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-deb'
|
||||
EXTENSION = 'deb'
|
||||
|
||||
def __init__(self):
|
||||
super(Deb, self).__init__(
|
||||
mime=Deb.MIME,
|
||||
extension=Deb.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 20 and
|
||||
buf[0] == 0x21 and
|
||||
buf[1] == 0x3C and
|
||||
buf[2] == 0x61 and
|
||||
buf[3] == 0x72 and
|
||||
buf[4] == 0x63 and
|
||||
buf[5] == 0x68 and
|
||||
buf[6] == 0x3E and
|
||||
buf[7] == 0x0A and
|
||||
buf[8] == 0x64 and
|
||||
buf[9] == 0x65 and
|
||||
buf[10] == 0x62 and
|
||||
buf[11] == 0x69 and
|
||||
buf[12] == 0x61 and
|
||||
buf[13] == 0x6E and
|
||||
buf[14] == 0x2D and
|
||||
buf[15] == 0x62 and
|
||||
buf[16] == 0x69 and
|
||||
buf[17] == 0x6E and
|
||||
buf[18] == 0x61 and
|
||||
buf[19] == 0x72 and
|
||||
buf[20] == 0x79)
|
||||
|
||||
|
||||
class Ar(Type):
|
||||
"""
|
||||
Implements the AR archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-unix-archive'
|
||||
EXTENSION = 'ar'
|
||||
|
||||
def __init__(self):
|
||||
super(Ar, self).__init__(
|
||||
mime=Ar.MIME,
|
||||
extension=Ar.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 6 and
|
||||
buf[0] == 0x21 and
|
||||
buf[1] == 0x3C and
|
||||
buf[2] == 0x61 and
|
||||
buf[3] == 0x72 and
|
||||
buf[4] == 0x63 and
|
||||
buf[5] == 0x68 and
|
||||
buf[6] == 0x3E)
|
||||
|
||||
|
||||
class Z(Type):
|
||||
"""
|
||||
Implements the Z archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-compress'
|
||||
EXTENSION = 'Z'
|
||||
|
||||
def __init__(self):
|
||||
super(Z, self).__init__(
|
||||
mime=Z.MIME,
|
||||
extension=Z.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 1 and
|
||||
((buf[0] == 0x1F and
|
||||
buf[1] == 0xA0) or
|
||||
(buf[0] == 0x1F and
|
||||
buf[1] == 0x9D)))
|
||||
|
||||
|
||||
class Lzop(Type):
|
||||
"""
|
||||
Implements the Lzop archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-lzop'
|
||||
EXTENSION = 'lzo'
|
||||
|
||||
def __init__(self):
|
||||
super(Lzop, self).__init__(
|
||||
mime=Lzop.MIME,
|
||||
extension=Lzop.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 7 and
|
||||
buf[0] == 0x89 and
|
||||
buf[1] == 0x4C and
|
||||
buf[2] == 0x5A and
|
||||
buf[3] == 0x4F and
|
||||
buf[4] == 0x00 and
|
||||
buf[5] == 0x0D and
|
||||
buf[6] == 0x0A and
|
||||
buf[7] == 0x1A)
|
||||
|
||||
|
||||
class Lz(Type):
|
||||
"""
|
||||
Implements the Lz archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-lzip'
|
||||
EXTENSION = 'lz'
|
||||
|
||||
def __init__(self):
|
||||
super(Lz, self).__init__(
|
||||
mime=Lz.MIME,
|
||||
extension=Lz.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x4C and
|
||||
buf[1] == 0x5A and
|
||||
buf[2] == 0x49 and
|
||||
buf[3] == 0x50)
|
||||
|
||||
|
||||
class Elf(Type):
|
||||
"""
|
||||
Implements the Elf archive type matcher
|
||||
"""
|
||||
MIME = 'application/x-executable'
|
||||
EXTENSION = 'elf'
|
||||
|
||||
def __init__(self):
|
||||
super(Elf, self).__init__(
|
||||
mime=Elf.MIME,
|
||||
extension=Elf.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 52 and
|
||||
buf[0] == 0x7F and
|
||||
buf[1] == 0x45 and
|
||||
buf[2] == 0x4C and
|
||||
buf[3] == 0x46)
|
||||
|
||||
|
||||
class Lz4(Type):
|
||||
"""
|
||||
Implements the Lz4 archive type matcher.
|
||||
"""
|
||||
MIME = 'application/x-lz4'
|
||||
EXTENSION = 'lz4'
|
||||
|
||||
def __init__(self):
|
||||
super(Lz4, self).__init__(
|
||||
mime=Lz4.MIME,
|
||||
extension=Lz4.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x04 and
|
||||
buf[1] == 0x22 and
|
||||
buf[2] == 0x4D and
|
||||
buf[3] == 0x18)
|
||||
|
||||
|
||||
class Br(Type):
|
||||
"""Implements the Br image type matcher."""
|
||||
|
||||
MIME = 'application/x-brotli'
|
||||
EXTENSION = 'br'
|
||||
|
||||
def __init__(self):
|
||||
super(Br, self).__init__(
|
||||
mime=Br.MIME,
|
||||
extension=Br.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return buf[:4] == bytearray([0xce, 0xb2, 0xcf, 0x81])
|
||||
|
||||
|
||||
class Dcm(Type):
|
||||
"""Implements the Dcm image type matcher."""
|
||||
|
||||
MIME = 'application/dicom'
|
||||
EXTENSION = 'dcm'
|
||||
|
||||
def __init__(self):
|
||||
super(Dcm, self).__init__(
|
||||
mime=Dcm.MIME,
|
||||
extension=Dcm.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return buf[128:131] == bytearray([0x44, 0x49, 0x43, 0x4d])
|
||||
|
||||
|
||||
class Rpm(Type):
|
||||
"""Implements the Rpm image type matcher."""
|
||||
|
||||
MIME = 'application/x-rpm'
|
||||
EXTENSION = 'rpm'
|
||||
|
||||
def __init__(self):
|
||||
super(Rpm, self).__init__(
|
||||
mime=Rpm.MIME,
|
||||
extension=Rpm.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return buf[:4] == bytearray([0xed, 0xab, 0xee, 0xdb])
|
||||
|
||||
|
||||
class Zstd(Type):
|
||||
"""
|
||||
Implements the Zstd archive type matcher.
|
||||
https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
|
||||
"""
|
||||
MIME = 'application/zstd'
|
||||
EXTENSION = 'zst'
|
||||
MAGIC_SKIPPABLE_START = 0x184D2A50
|
||||
MAGIC_SKIPPABLE_MASK = 0xFFFFFFF0
|
||||
|
||||
def __init__(self):
|
||||
super(Zstd, self).__init__(
|
||||
mime=Zstd.MIME,
|
||||
extension=Zstd.EXTENSION
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _to_little_endian_int(buf):
|
||||
# return int.from_bytes(buf, byteorder='little')
|
||||
return struct.unpack('<L', buf)[0]
|
||||
|
||||
def match(self, buf):
|
||||
# Zstandard compressed data is made of one or more frames.
|
||||
# There are two frame formats defined by Zstandard:
|
||||
# Zstandard frames and Skippable frames.
|
||||
# See more details from
|
||||
# https://tools.ietf.org/id/draft-kucherawy-dispatch-zstd-00.html#rfc.section.2
|
||||
is_zstd = (
|
||||
len(buf) > 3 and
|
||||
buf[0] in (0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28) and
|
||||
buf[1] == 0xb5 and
|
||||
buf[2] == 0x2f and
|
||||
buf[3] == 0xfd)
|
||||
if is_zstd:
|
||||
return True
|
||||
# skippable frames
|
||||
if len(buf) < 8:
|
||||
return False
|
||||
magic = self._to_little_endian_int(buf[:4]) & Zstd.MAGIC_SKIPPABLE_MASK
|
||||
if magic == Zstd.MAGIC_SKIPPABLE_START:
|
||||
user_data_len = self._to_little_endian_int(buf[4:8])
|
||||
if len(buf) < 8 + user_data_len:
|
||||
return False
|
||||
next_frame = buf[8 + user_data_len:]
|
||||
return self.match(next_frame)
|
||||
return False
|
212
libs/filetype/types/audio.py
Normal file
212
libs/filetype/types/audio.py
Normal file
|
@ -0,0 +1,212 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .base import Type
|
||||
|
||||
|
||||
class Midi(Type):
|
||||
"""
|
||||
Implements the Midi audio type matcher.
|
||||
"""
|
||||
MIME = 'audio/midi'
|
||||
EXTENSION = 'midi'
|
||||
|
||||
def __init__(self):
|
||||
super(Midi, self).__init__(
|
||||
mime=Midi.MIME,
|
||||
extension=Midi.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x4D and
|
||||
buf[1] == 0x54 and
|
||||
buf[2] == 0x68 and
|
||||
buf[3] == 0x64)
|
||||
|
||||
|
||||
class Mp3(Type):
|
||||
"""
|
||||
Implements the MP3 audio type matcher.
|
||||
"""
|
||||
MIME = 'audio/mpeg'
|
||||
EXTENSION = 'mp3'
|
||||
|
||||
def __init__(self):
|
||||
super(Mp3, self).__init__(
|
||||
mime=Mp3.MIME,
|
||||
extension=Mp3.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 2 and
|
||||
((buf[0] == 0x49 and
|
||||
buf[1] == 0x44 and
|
||||
buf[2] == 0x33) or
|
||||
(buf[0] == 0xFF and
|
||||
buf[1] == 0xF2) or
|
||||
(buf[0] == 0xFF and
|
||||
buf[1] == 0xF3) or
|
||||
(buf[0] == 0xFF and
|
||||
buf[1] == 0xFB)))
|
||||
|
||||
|
||||
class M4a(Type):
|
||||
"""
|
||||
Implements the M4A audio type matcher.
|
||||
"""
|
||||
MIME = 'audio/mp4'
|
||||
EXTENSION = 'm4a'
|
||||
|
||||
def __init__(self):
|
||||
super(M4a, self).__init__(
|
||||
mime=M4a.MIME,
|
||||
extension=M4a.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 10 and
|
||||
((buf[4] == 0x66 and
|
||||
buf[5] == 0x74 and
|
||||
buf[6] == 0x79 and
|
||||
buf[7] == 0x70 and
|
||||
buf[8] == 0x4D and
|
||||
buf[9] == 0x34 and
|
||||
buf[10] == 0x41) or
|
||||
(buf[0] == 0x4D and
|
||||
buf[1] == 0x34 and
|
||||
buf[2] == 0x41 and
|
||||
buf[3] == 0x20)))
|
||||
|
||||
|
||||
class Ogg(Type):
|
||||
"""
|
||||
Implements the OGG audio type matcher.
|
||||
"""
|
||||
MIME = 'audio/ogg'
|
||||
EXTENSION = 'ogg'
|
||||
|
||||
def __init__(self):
|
||||
super(Ogg, self).__init__(
|
||||
mime=Ogg.MIME,
|
||||
extension=Ogg.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x4F and
|
||||
buf[1] == 0x67 and
|
||||
buf[2] == 0x67 and
|
||||
buf[3] == 0x53)
|
||||
|
||||
|
||||
class Flac(Type):
|
||||
"""
|
||||
Implements the FLAC audio type matcher.
|
||||
"""
|
||||
MIME = 'audio/x-flac'
|
||||
EXTENSION = 'flac'
|
||||
|
||||
def __init__(self):
|
||||
super(Flac, self).__init__(
|
||||
mime=Flac.MIME,
|
||||
extension=Flac.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x66 and
|
||||
buf[1] == 0x4C and
|
||||
buf[2] == 0x61 and
|
||||
buf[3] == 0x43)
|
||||
|
||||
|
||||
class Wav(Type):
|
||||
"""
|
||||
Implements the WAV audio type matcher.
|
||||
"""
|
||||
MIME = 'audio/x-wav'
|
||||
EXTENSION = 'wav'
|
||||
|
||||
def __init__(self):
|
||||
super(Wav, self).__init__(
|
||||
mime=Wav.MIME,
|
||||
extension=Wav.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 11 and
|
||||
buf[0] == 0x52 and
|
||||
buf[1] == 0x49 and
|
||||
buf[2] == 0x46 and
|
||||
buf[3] == 0x46 and
|
||||
buf[8] == 0x57 and
|
||||
buf[9] == 0x41 and
|
||||
buf[10] == 0x56 and
|
||||
buf[11] == 0x45)
|
||||
|
||||
|
||||
class Amr(Type):
|
||||
"""
|
||||
Implements the AMR audio type matcher.
|
||||
"""
|
||||
MIME = 'audio/amr'
|
||||
EXTENSION = 'amr'
|
||||
|
||||
def __init__(self):
|
||||
super(Amr, self).__init__(
|
||||
mime=Amr.MIME,
|
||||
extension=Amr.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 11 and
|
||||
buf[0] == 0x23 and
|
||||
buf[1] == 0x21 and
|
||||
buf[2] == 0x41 and
|
||||
buf[3] == 0x4D and
|
||||
buf[4] == 0x52 and
|
||||
buf[5] == 0x0A)
|
||||
|
||||
|
||||
class Aac(Type):
|
||||
"""Implements the Aac audio type matcher."""
|
||||
|
||||
MIME = 'audio/aac'
|
||||
EXTENSION = 'aac'
|
||||
|
||||
def __init__(self):
|
||||
super(Aac, self).__init__(
|
||||
mime=Aac.MIME,
|
||||
extension=Aac.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (buf[:2] == bytearray([0xff, 0xf1]) or
|
||||
buf[:2] == bytearray([0xff, 0xf9]))
|
||||
|
||||
|
||||
class Aiff(Type):
|
||||
"""
|
||||
Implements the AIFF audio type matcher.
|
||||
"""
|
||||
MIME = 'audio/x-aiff'
|
||||
EXTENSION = 'aiff'
|
||||
|
||||
def __init__(self):
|
||||
super(Aiff, self).__init__(
|
||||
mime=Aiff.MIME,
|
||||
extension=Aiff.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 11 and
|
||||
buf[0] == 0x46 and
|
||||
buf[1] == 0x4F and
|
||||
buf[2] == 0x52 and
|
||||
buf[3] == 0x4D and
|
||||
buf[8] == 0x41 and
|
||||
buf[9] == 0x49 and
|
||||
buf[10] == 0x46 and
|
||||
buf[11] == 0x46)
|
29
libs/filetype/types/base.py
Normal file
29
libs/filetype/types/base.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
class Type(object):
|
||||
"""
|
||||
Represents the file type object inherited by
|
||||
specific file type matchers.
|
||||
Provides convenient accessor and helper methods.
|
||||
"""
|
||||
def __init__(self, mime, extension):
|
||||
self.__mime = mime
|
||||
self.__extension = extension
|
||||
|
||||
@property
|
||||
def mime(self):
|
||||
return self.__mime
|
||||
|
||||
@property
|
||||
def extension(self):
|
||||
return self.__extension
|
||||
|
||||
def is_extension(self, extension):
|
||||
return self.__extension is extension
|
||||
|
||||
def is_mime(self, mime):
|
||||
return self.__mime is mime
|
||||
|
||||
def match(self, buf):
|
||||
raise NotImplementedError
|
256
libs/filetype/types/document.py
Normal file
256
libs/filetype/types/document.py
Normal file
|
@ -0,0 +1,256 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .base import Type
|
||||
|
||||
|
||||
class ZippedDocumentBase(Type):
|
||||
def match(self, buf):
|
||||
# start by checking for ZIP local file header signature
|
||||
idx = self.search_signature(buf, 0, 6000)
|
||||
if idx != 0:
|
||||
return
|
||||
|
||||
return self.match_document(buf)
|
||||
|
||||
def match_document(self, buf):
|
||||
raise NotImplementedError
|
||||
|
||||
def compare_bytes(self, buf, subslice, start_offset):
|
||||
sl = len(subslice)
|
||||
|
||||
if start_offset + sl > len(buf):
|
||||
return False
|
||||
|
||||
return buf[start_offset:start_offset + sl] == subslice
|
||||
|
||||
def search_signature(self, buf, start, rangeNum):
|
||||
signature = b"PK\x03\x04"
|
||||
length = len(buf)
|
||||
|
||||
end = start + rangeNum
|
||||
end = length if end > length else end
|
||||
|
||||
if start >= end:
|
||||
return -1
|
||||
|
||||
try:
|
||||
return buf.index(signature, start, end)
|
||||
except ValueError:
|
||||
return -1
|
||||
|
||||
|
||||
class OpenDocument(ZippedDocumentBase):
|
||||
def match_document(self, buf):
|
||||
# Check if first file in archive is the identifying file
|
||||
if not self.compare_bytes(buf, b"mimetype", 0x1E):
|
||||
return
|
||||
|
||||
# Check content of mimetype file if it matches current mime
|
||||
return self.compare_bytes(buf, bytes(self.mime, "ASCII"), 0x26)
|
||||
|
||||
|
||||
class OfficeOpenXml(ZippedDocumentBase):
|
||||
def match_document(self, buf):
|
||||
# Check if first file in archive is the identifying file
|
||||
ft = self.match_filename(buf, 0x1E)
|
||||
if ft:
|
||||
return ft
|
||||
|
||||
# Otherwise check that the fist file is one of these
|
||||
if (
|
||||
not self.compare_bytes(buf, b"[Content_Types].xml", 0x1E)
|
||||
and not self.compare_bytes(buf, b"_rels/.rels", 0x1E)
|
||||
and not self.compare_bytes(buf, b"docProps", 0x1E)
|
||||
):
|
||||
return
|
||||
|
||||
# Loop through next 3 files and check if they match
|
||||
# NOTE: OpenOffice/Libreoffice orders ZIP entry differently, so check the 4th file
|
||||
# https://github.com/h2non/filetype/blob/d730d98ad5c990883148485b6fd5adbdd378364a/matchers/document.go#L134
|
||||
idx = 0
|
||||
for i in range(4):
|
||||
# Search for next file header
|
||||
idx = self.search_signature(buf, idx + 4, 6000)
|
||||
if idx == -1:
|
||||
return
|
||||
|
||||
# Filename is at file header + 30
|
||||
ft = self.match_filename(buf, idx + 30)
|
||||
if ft:
|
||||
return ft
|
||||
|
||||
def match_filename(self, buf, offset):
|
||||
if self.compare_bytes(buf, b"word/", offset):
|
||||
return (
|
||||
self.mime
|
||||
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
)
|
||||
if self.compare_bytes(buf, b"ppt/", offset):
|
||||
return (
|
||||
self.mime
|
||||
== "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
)
|
||||
if self.compare_bytes(buf, b"xl/", offset):
|
||||
return (
|
||||
self.mime
|
||||
== "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
)
|
||||
|
||||
|
||||
class Doc(Type):
|
||||
"""
|
||||
Implements the Microsoft Word (Office 97-2003) document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/msword"
|
||||
EXTENSION = "doc"
|
||||
|
||||
def __init__(self):
|
||||
super(Doc, self).__init__(mime=Doc.MIME, extension=Doc.EXTENSION)
|
||||
|
||||
def match(self, buf):
|
||||
if len(buf) > 515 and buf[0:8] == b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1":
|
||||
if buf[512:516] == b"\xEC\xA5\xC1\x00":
|
||||
return True
|
||||
if (
|
||||
len(buf) > 2142
|
||||
and b"\x00\x0A\x00\x00\x00MSWordDoc\x00\x10\x00\x00\x00Word.Document.8\x00\xF49\xB2q"
|
||||
in buf[2075:2142]
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class Docx(OfficeOpenXml):
|
||||
"""
|
||||
Implements the Microsoft Word OOXML (Office 2007+) document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
EXTENSION = "docx"
|
||||
|
||||
def __init__(self):
|
||||
super(Docx, self).__init__(mime=Docx.MIME, extension=Docx.EXTENSION)
|
||||
|
||||
|
||||
class Odt(OpenDocument):
|
||||
"""
|
||||
Implements the OpenDocument Text document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/vnd.oasis.opendocument.text"
|
||||
EXTENSION = "odt"
|
||||
|
||||
def __init__(self):
|
||||
super(Odt, self).__init__(mime=Odt.MIME, extension=Odt.EXTENSION)
|
||||
|
||||
|
||||
class Xls(Type):
|
||||
"""
|
||||
Implements the Microsoft Excel (Office 97-2003) document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/vnd.ms-excel"
|
||||
EXTENSION = "xls"
|
||||
|
||||
def __init__(self):
|
||||
super(Xls, self).__init__(mime=Xls.MIME, extension=Xls.EXTENSION)
|
||||
|
||||
def match(self, buf):
|
||||
if len(buf) > 520 and buf[0:8] == b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1":
|
||||
if buf[512:516] == b"\xFD\xFF\xFF\xFF" and (
|
||||
buf[518] == 0x00 or buf[518] == 0x02
|
||||
):
|
||||
return True
|
||||
if buf[512:520] == b"\x09\x08\x10\x00\x00\x06\x05\x00":
|
||||
return True
|
||||
if (
|
||||
len(buf) > 2095
|
||||
and b"\xE2\x00\x00\x00\x5C\x00\x70\x00\x04\x00\x00Calc"
|
||||
in buf[1568:2095]
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class Xlsx(OfficeOpenXml):
|
||||
"""
|
||||
Implements the Microsoft Excel OOXML (Office 2007+) document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
EXTENSION = "xlsx"
|
||||
|
||||
def __init__(self):
|
||||
super(Xlsx, self).__init__(mime=Xlsx.MIME, extension=Xlsx.EXTENSION)
|
||||
|
||||
|
||||
class Ods(OpenDocument):
|
||||
"""
|
||||
Implements the OpenDocument Spreadsheet document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/vnd.oasis.opendocument.spreadsheet"
|
||||
EXTENSION = "ods"
|
||||
|
||||
def __init__(self):
|
||||
super(Ods, self).__init__(mime=Ods.MIME, extension=Ods.EXTENSION)
|
||||
|
||||
|
||||
class Ppt(Type):
|
||||
"""
|
||||
Implements the Microsoft PowerPoint (Office 97-2003) document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/vnd.ms-powerpoint"
|
||||
EXTENSION = "ppt"
|
||||
|
||||
def __init__(self):
|
||||
super(Ppt, self).__init__(mime=Ppt.MIME, extension=Ppt.EXTENSION)
|
||||
|
||||
def match(self, buf):
|
||||
if len(buf) > 524 and buf[0:8] == b"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1":
|
||||
if buf[512:516] == b"\xA0\x46\x1D\xF0":
|
||||
return True
|
||||
if buf[512:516] == b"\x00\x6E\x1E\xF0":
|
||||
return True
|
||||
if buf[512:516] == b"\x0F\x00\xE8\x03":
|
||||
return True
|
||||
if buf[512:516] == b"\xFD\xFF\xFF\xFF" and buf[522:524] == b"\x00\x00":
|
||||
return True
|
||||
if (
|
||||
len(buf) > 2096
|
||||
and buf[2072:2096]
|
||||
== b"\x00\xB9\x29\xE8\x11\x00\x00\x00MS PowerPoint 97"
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class Pptx(OfficeOpenXml):
|
||||
"""
|
||||
Implements the Microsoft PowerPoint OOXML (Office 2007+) document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
EXTENSION = "pptx"
|
||||
|
||||
def __init__(self):
|
||||
super(Pptx, self).__init__(mime=Pptx.MIME, extension=Pptx.EXTENSION)
|
||||
|
||||
|
||||
class Odp(OpenDocument):
|
||||
"""
|
||||
Implements the OpenDocument Presentation document type matcher.
|
||||
"""
|
||||
|
||||
MIME = "application/vnd.oasis.opendocument.presentation"
|
||||
EXTENSION = "odp"
|
||||
|
||||
def __init__(self):
|
||||
super(Odp, self).__init__(mime=Odp.MIME, extension=Odp.EXTENSION)
|
115
libs/filetype/types/font.py
Normal file
115
libs/filetype/types/font.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .base import Type
|
||||
|
||||
|
||||
class Woff(Type):
|
||||
"""
|
||||
Implements the WOFF font type matcher.
|
||||
"""
|
||||
MIME = 'application/font-woff'
|
||||
EXTENSION = 'woff'
|
||||
|
||||
def __init__(self):
|
||||
super(Woff, self).__init__(
|
||||
mime=Woff.MIME,
|
||||
extension=Woff.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 7 and
|
||||
buf[0] == 0x77 and
|
||||
buf[1] == 0x4F and
|
||||
buf[2] == 0x46 and
|
||||
buf[3] == 0x46 and
|
||||
((buf[4] == 0x00 and
|
||||
buf[5] == 0x01 and
|
||||
buf[6] == 0x00 and
|
||||
buf[7] == 0x00) or
|
||||
(buf[4] == 0x4F and
|
||||
buf[5] == 0x54 and
|
||||
buf[6] == 0x54 and
|
||||
buf[7] == 0x4F) or
|
||||
(buf[4] == 0x74 and
|
||||
buf[5] == 0x72 and
|
||||
buf[6] == 0x75 and
|
||||
buf[7] == 0x65)))
|
||||
|
||||
|
||||
class Woff2(Type):
|
||||
"""
|
||||
Implements the WOFF2 font type matcher.
|
||||
"""
|
||||
MIME = 'application/font-woff'
|
||||
EXTENSION = 'woff2'
|
||||
|
||||
def __init__(self):
|
||||
super(Woff2, self).__init__(
|
||||
mime=Woff2.MIME,
|
||||
extension=Woff2.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 7 and
|
||||
buf[0] == 0x77 and
|
||||
buf[1] == 0x4F and
|
||||
buf[2] == 0x46 and
|
||||
buf[3] == 0x32 and
|
||||
((buf[4] == 0x00 and
|
||||
buf[5] == 0x01 and
|
||||
buf[6] == 0x00 and
|
||||
buf[7] == 0x00) or
|
||||
(buf[4] == 0x4F and
|
||||
buf[5] == 0x54 and
|
||||
buf[6] == 0x54 and
|
||||
buf[7] == 0x4F) or
|
||||
(buf[4] == 0x74 and
|
||||
buf[5] == 0x72 and
|
||||
buf[6] == 0x75 and
|
||||
buf[7] == 0x65)))
|
||||
|
||||
|
||||
class Ttf(Type):
|
||||
"""
|
||||
Implements the TTF font type matcher.
|
||||
"""
|
||||
MIME = 'application/font-sfnt'
|
||||
EXTENSION = 'ttf'
|
||||
|
||||
def __init__(self):
|
||||
super(Ttf, self).__init__(
|
||||
mime=Ttf.MIME,
|
||||
extension=Ttf.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 4 and
|
||||
buf[0] == 0x00 and
|
||||
buf[1] == 0x01 and
|
||||
buf[2] == 0x00 and
|
||||
buf[3] == 0x00 and
|
||||
buf[4] == 0x00)
|
||||
|
||||
|
||||
class Otf(Type):
|
||||
"""
|
||||
Implements the OTF font type matcher.
|
||||
"""
|
||||
MIME = 'application/font-sfnt'
|
||||
EXTENSION = 'otf'
|
||||
|
||||
def __init__(self):
|
||||
super(Otf, self).__init__(
|
||||
mime=Otf.MIME,
|
||||
extension=Otf.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 4 and
|
||||
buf[0] == 0x4F and
|
||||
buf[1] == 0x54 and
|
||||
buf[2] == 0x54 and
|
||||
buf[3] == 0x4F and
|
||||
buf[4] == 0x00)
|
383
libs/filetype/types/image.py
Normal file
383
libs/filetype/types/image.py
Normal file
|
@ -0,0 +1,383 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .base import Type
|
||||
from .isobmff import IsoBmff
|
||||
|
||||
|
||||
class Jpeg(Type):
|
||||
"""
|
||||
Implements the JPEG image type matcher.
|
||||
"""
|
||||
MIME = 'image/jpeg'
|
||||
EXTENSION = 'jpg'
|
||||
|
||||
def __init__(self):
|
||||
super(Jpeg, self).__init__(
|
||||
mime=Jpeg.MIME,
|
||||
extension=Jpeg.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 2 and
|
||||
buf[0] == 0xFF and
|
||||
buf[1] == 0xD8 and
|
||||
buf[2] == 0xFF)
|
||||
|
||||
|
||||
class Jpx(Type):
|
||||
"""
|
||||
Implements the JPEG2000 image type matcher.
|
||||
"""
|
||||
|
||||
MIME = "image/jpx"
|
||||
EXTENSION = "jpx"
|
||||
|
||||
def __init__(self):
|
||||
super(Jpx, self).__init__(mime=Jpx.MIME, extension=Jpx.EXTENSION)
|
||||
|
||||
def match(self, buf):
|
||||
return (
|
||||
len(buf) > 50
|
||||
and buf[0] == 0x00
|
||||
and buf[1] == 0x00
|
||||
and buf[2] == 0x00
|
||||
and buf[3] == 0x0C
|
||||
and buf[16:24] == b"ftypjp2 "
|
||||
)
|
||||
|
||||
|
||||
class Apng(Type):
|
||||
"""
|
||||
Implements the APNG image type matcher.
|
||||
"""
|
||||
MIME = 'image/apng'
|
||||
EXTENSION = 'apng'
|
||||
|
||||
def __init__(self):
|
||||
super(Apng, self).__init__(
|
||||
mime=Apng.MIME,
|
||||
extension=Apng.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
if (len(buf) > 8 and
|
||||
buf[:8] == bytearray([0x89, 0x50, 0x4e, 0x47,
|
||||
0x0d, 0x0a, 0x1a, 0x0a])):
|
||||
# cursor in buf, skip already readed 8 bytes
|
||||
i = 8
|
||||
while len(buf) > i:
|
||||
data_length = int.from_bytes(buf[i:i+4], byteorder="big")
|
||||
i += 4
|
||||
|
||||
chunk_type = buf[i:i+4].decode("ascii", errors='ignore')
|
||||
i += 4
|
||||
|
||||
# acTL chunk in APNG should appears first than IDAT
|
||||
# IEND is end of PNG
|
||||
if (chunk_type == "IDAT" or chunk_type == "IEND"):
|
||||
return False
|
||||
elif (chunk_type == "acTL"):
|
||||
return True
|
||||
|
||||
# move to the next chunk by skipping data and crc (4 bytes)
|
||||
i += data_length + 4
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class Png(Type):
|
||||
"""
|
||||
Implements the PNG image type matcher.
|
||||
"""
|
||||
MIME = 'image/png'
|
||||
EXTENSION = 'png'
|
||||
|
||||
def __init__(self):
|
||||
super(Png, self).__init__(
|
||||
mime=Png.MIME,
|
||||
extension=Png.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x89 and
|
||||
buf[1] == 0x50 and
|
||||
buf[2] == 0x4E and
|
||||
buf[3] == 0x47)
|
||||
|
||||
|
||||
class Gif(Type):
|
||||
"""
|
||||
Implements the GIF image type matcher.
|
||||
"""
|
||||
MIME = 'image/gif'
|
||||
EXTENSION = 'gif'
|
||||
|
||||
def __init__(self):
|
||||
super(Gif, self).__init__(
|
||||
mime=Gif.MIME,
|
||||
extension=Gif.EXTENSION,
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 2 and
|
||||
buf[0] == 0x47 and
|
||||
buf[1] == 0x49 and
|
||||
buf[2] == 0x46)
|
||||
|
||||
|
||||
class Webp(Type):
|
||||
"""
|
||||
Implements the WEBP image type matcher.
|
||||
"""
|
||||
MIME = 'image/webp'
|
||||
EXTENSION = 'webp'
|
||||
|
||||
def __init__(self):
|
||||
super(Webp, self).__init__(
|
||||
mime=Webp.MIME,
|
||||
extension=Webp.EXTENSION,
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 13 and
|
||||
buf[0] == 0x52 and
|
||||
buf[1] == 0x49 and
|
||||
buf[2] == 0x46 and
|
||||
buf[3] == 0x46 and
|
||||
buf[8] == 0x57 and
|
||||
buf[9] == 0x45 and
|
||||
buf[10] == 0x42 and
|
||||
buf[11] == 0x50 and
|
||||
buf[12] == 0x56 and
|
||||
buf[13] == 0x50)
|
||||
|
||||
|
||||
class Cr2(Type):
|
||||
"""
|
||||
Implements the CR2 image type matcher.
|
||||
"""
|
||||
MIME = 'image/x-canon-cr2'
|
||||
EXTENSION = 'cr2'
|
||||
|
||||
def __init__(self):
|
||||
super(Cr2, self).__init__(
|
||||
mime=Cr2.MIME,
|
||||
extension=Cr2.EXTENSION,
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 9 and
|
||||
((buf[0] == 0x49 and buf[1] == 0x49 and
|
||||
buf[2] == 0x2A and buf[3] == 0x0) or
|
||||
(buf[0] == 0x4D and buf[1] == 0x4D and
|
||||
buf[2] == 0x0 and buf[3] == 0x2A)) and
|
||||
buf[8] == 0x43 and buf[9] == 0x52)
|
||||
|
||||
|
||||
class Tiff(Type):
|
||||
"""
|
||||
Implements the TIFF image type matcher.
|
||||
"""
|
||||
MIME = 'image/tiff'
|
||||
EXTENSION = 'tif'
|
||||
|
||||
def __init__(self):
|
||||
super(Tiff, self).__init__(
|
||||
mime=Tiff.MIME,
|
||||
extension=Tiff.EXTENSION,
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 9 and
|
||||
((buf[0] == 0x49 and buf[1] == 0x49 and
|
||||
buf[2] == 0x2A and buf[3] == 0x0) or
|
||||
(buf[0] == 0x4D and buf[1] == 0x4D and
|
||||
buf[2] == 0x0 and buf[3] == 0x2A))
|
||||
and not (buf[8] == 0x43 and buf[9] == 0x52))
|
||||
|
||||
|
||||
class Bmp(Type):
|
||||
"""
|
||||
Implements the BMP image type matcher.
|
||||
"""
|
||||
MIME = 'image/bmp'
|
||||
EXTENSION = 'bmp'
|
||||
|
||||
def __init__(self):
|
||||
super(Bmp, self).__init__(
|
||||
mime=Bmp.MIME,
|
||||
extension=Bmp.EXTENSION,
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 1 and
|
||||
buf[0] == 0x42 and
|
||||
buf[1] == 0x4D)
|
||||
|
||||
|
||||
class Jxr(Type):
|
||||
"""
|
||||
Implements the JXR image type matcher.
|
||||
"""
|
||||
MIME = 'image/vnd.ms-photo'
|
||||
EXTENSION = 'jxr'
|
||||
|
||||
def __init__(self):
|
||||
super(Jxr, self).__init__(
|
||||
mime=Jxr.MIME,
|
||||
extension=Jxr.EXTENSION,
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 2 and
|
||||
buf[0] == 0x49 and
|
||||
buf[1] == 0x49 and
|
||||
buf[2] == 0xBC)
|
||||
|
||||
|
||||
class Psd(Type):
|
||||
"""
|
||||
Implements the PSD image type matcher.
|
||||
"""
|
||||
MIME = 'image/vnd.adobe.photoshop'
|
||||
EXTENSION = 'psd'
|
||||
|
||||
def __init__(self):
|
||||
super(Psd, self).__init__(
|
||||
mime=Psd.MIME,
|
||||
extension=Psd.EXTENSION,
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x38 and
|
||||
buf[1] == 0x42 and
|
||||
buf[2] == 0x50 and
|
||||
buf[3] == 0x53)
|
||||
|
||||
|
||||
class Ico(Type):
|
||||
"""
|
||||
Implements the ICO image type matcher.
|
||||
"""
|
||||
MIME = 'image/x-icon'
|
||||
EXTENSION = 'ico'
|
||||
|
||||
def __init__(self):
|
||||
super(Ico, self).__init__(
|
||||
mime=Ico.MIME,
|
||||
extension=Ico.EXTENSION,
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x00 and
|
||||
buf[1] == 0x00 and
|
||||
buf[2] == 0x01 and
|
||||
buf[3] == 0x00)
|
||||
|
||||
|
||||
class Heic(IsoBmff):
|
||||
"""
|
||||
Implements the HEIC image type matcher.
|
||||
"""
|
||||
MIME = 'image/heic'
|
||||
EXTENSION = 'heic'
|
||||
|
||||
def __init__(self):
|
||||
super(Heic, self).__init__(
|
||||
mime=Heic.MIME,
|
||||
extension=Heic.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
if not self._is_isobmff(buf):
|
||||
return False
|
||||
|
||||
major_brand, minor_version, compatible_brands = self._get_ftyp(buf)
|
||||
if major_brand == 'heic':
|
||||
return True
|
||||
if major_brand in ['mif1', 'msf1'] and 'heic' in compatible_brands:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class Dcm(Type):
|
||||
|
||||
MIME = 'application/dicom'
|
||||
EXTENSION = 'dcm'
|
||||
OFFSET = 128
|
||||
|
||||
def __init__(self):
|
||||
super(Dcm, self).__init__(
|
||||
mime=Dcm.MIME,
|
||||
extension=Dcm.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > Dcm.OFFSET + 4 and
|
||||
buf[Dcm.OFFSET + 0] == 0x44 and
|
||||
buf[Dcm.OFFSET + 1] == 0x49 and
|
||||
buf[Dcm.OFFSET + 2] == 0x43 and
|
||||
buf[Dcm.OFFSET + 3] == 0x4D)
|
||||
|
||||
|
||||
class Dwg(Type):
|
||||
"""Implements the Dwg image type matcher."""
|
||||
|
||||
MIME = 'image/vnd.dwg'
|
||||
EXTENSION = 'dwg'
|
||||
|
||||
def __init__(self):
|
||||
super(Dwg, self).__init__(
|
||||
mime=Dwg.MIME,
|
||||
extension=Dwg.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return buf[:4] == bytearray([0x41, 0x43, 0x31, 0x30])
|
||||
|
||||
|
||||
class Xcf(Type):
|
||||
"""Implements the Xcf image type matcher."""
|
||||
|
||||
MIME = 'image/x-xcf'
|
||||
EXTENSION = 'xcf'
|
||||
|
||||
def __init__(self):
|
||||
super(Xcf, self).__init__(
|
||||
mime=Xcf.MIME,
|
||||
extension=Xcf.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return buf[:10] == bytearray([0x67, 0x69, 0x6d, 0x70, 0x20,
|
||||
0x78, 0x63, 0x66, 0x20, 0x76])
|
||||
|
||||
|
||||
class Avif(IsoBmff):
|
||||
"""
|
||||
Implements the AVIF image type matcher.
|
||||
"""
|
||||
MIME = 'image/avif'
|
||||
EXTENSION = 'avif'
|
||||
|
||||
def __init__(self):
|
||||
super(Avif, self).__init__(
|
||||
mime=Avif.MIME,
|
||||
extension=Avif.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
if not self._is_isobmff(buf):
|
||||
return False
|
||||
|
||||
major_brand, minor_version, compatible_brands = self._get_ftyp(buf)
|
||||
if major_brand == 'avif':
|
||||
return True
|
||||
if major_brand in ['mif1', 'msf1'] and 'avif' in compatible_brands:
|
||||
return True
|
||||
return False
|
33
libs/filetype/types/isobmff.py
Normal file
33
libs/filetype/types/isobmff.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
import codecs
|
||||
|
||||
from .base import Type
|
||||
|
||||
|
||||
class IsoBmff(Type):
|
||||
"""
|
||||
Implements the ISO-BMFF base type.
|
||||
"""
|
||||
def __init__(self, mime, extension):
|
||||
super(IsoBmff, self).__init__(
|
||||
mime=mime,
|
||||
extension=extension
|
||||
)
|
||||
|
||||
def _is_isobmff(self, buf):
|
||||
if len(buf) < 16 or buf[4:8] != b'ftyp':
|
||||
return False
|
||||
if len(buf) < int(codecs.encode(buf[0:4], 'hex'), 16):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _get_ftyp(self, buf):
|
||||
ftyp_len = int(codecs.encode(buf[0:4], 'hex'), 16)
|
||||
major_brand = buf[8:12].decode(errors='ignore')
|
||||
minor_version = int(codecs.encode(buf[12:16], 'hex'), 16)
|
||||
compatible_brands = []
|
||||
for i in range(16, ftyp_len, 4):
|
||||
compatible_brands.append(buf[i:i+4].decode(errors='ignore'))
|
||||
|
||||
return major_brand, minor_version, compatible_brands
|
223
libs/filetype/types/video.py
Normal file
223
libs/filetype/types/video.py
Normal file
|
@ -0,0 +1,223 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .base import Type
|
||||
from .isobmff import IsoBmff
|
||||
|
||||
|
||||
class Mp4(IsoBmff):
|
||||
"""
|
||||
Implements the MP4 video type matcher.
|
||||
"""
|
||||
MIME = 'video/mp4'
|
||||
EXTENSION = 'mp4'
|
||||
|
||||
def __init__(self):
|
||||
super(Mp4, self).__init__(
|
||||
mime=Mp4.MIME,
|
||||
extension=Mp4.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
if not self._is_isobmff(buf):
|
||||
return False
|
||||
|
||||
major_brand, minor_version, compatible_brands = self._get_ftyp(buf)
|
||||
for brand in compatible_brands:
|
||||
if brand in ['mp41', 'mp42', 'isom']:
|
||||
return True
|
||||
return major_brand in ['mp41', 'mp42', 'isom']
|
||||
|
||||
|
||||
class M4v(Type):
|
||||
"""
|
||||
Implements the M4V video type matcher.
|
||||
"""
|
||||
MIME = 'video/x-m4v'
|
||||
EXTENSION = 'm4v'
|
||||
|
||||
def __init__(self):
|
||||
super(M4v, self).__init__(
|
||||
mime=M4v.MIME,
|
||||
extension=M4v.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 10 and
|
||||
buf[0] == 0x0 and buf[1] == 0x0 and
|
||||
buf[2] == 0x0 and buf[3] == 0x1C and
|
||||
buf[4] == 0x66 and buf[5] == 0x74 and
|
||||
buf[6] == 0x79 and buf[7] == 0x70 and
|
||||
buf[8] == 0x4D and buf[9] == 0x34 and
|
||||
buf[10] == 0x56)
|
||||
|
||||
|
||||
class Mkv(Type):
|
||||
"""
|
||||
Implements the MKV video type matcher.
|
||||
"""
|
||||
MIME = 'video/x-matroska'
|
||||
EXTENSION = 'mkv'
|
||||
|
||||
def __init__(self):
|
||||
super(Mkv, self).__init__(
|
||||
mime=Mkv.MIME,
|
||||
extension=Mkv.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
contains_ebml_element = buf.startswith(b'\x1A\x45\xDF\xA3')
|
||||
contains_doctype_element = buf.find(b'\x42\x82\x88matroska') > -1
|
||||
return contains_ebml_element and contains_doctype_element
|
||||
|
||||
|
||||
class Webm(Type):
|
||||
"""
|
||||
Implements the WebM video type matcher.
|
||||
"""
|
||||
MIME = 'video/webm'
|
||||
EXTENSION = 'webm'
|
||||
|
||||
def __init__(self):
|
||||
super(Webm, self).__init__(
|
||||
mime=Webm.MIME,
|
||||
extension=Webm.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
contains_ebml_element = buf.startswith(b'\x1A\x45\xDF\xA3')
|
||||
contains_doctype_element = buf.find(b'\x42\x82\x84webm') > -1
|
||||
return contains_ebml_element and contains_doctype_element
|
||||
|
||||
|
||||
class Mov(IsoBmff):
|
||||
"""
|
||||
Implements the MOV video type matcher.
|
||||
"""
|
||||
MIME = 'video/quicktime'
|
||||
EXTENSION = 'mov'
|
||||
|
||||
def __init__(self):
|
||||
super(Mov, self).__init__(
|
||||
mime=Mov.MIME,
|
||||
extension=Mov.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
if not self._is_isobmff(buf):
|
||||
return False
|
||||
|
||||
major_brand, minor_version, compatible_brands = self._get_ftyp(buf)
|
||||
return major_brand == 'qt '
|
||||
|
||||
|
||||
class Avi(Type):
|
||||
"""
|
||||
Implements the AVI video type matcher.
|
||||
"""
|
||||
MIME = 'video/x-msvideo'
|
||||
EXTENSION = 'avi'
|
||||
|
||||
def __init__(self):
|
||||
super(Avi, self).__init__(
|
||||
mime=Avi.MIME,
|
||||
extension=Avi.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 11 and
|
||||
buf[0] == 0x52 and
|
||||
buf[1] == 0x49 and
|
||||
buf[2] == 0x46 and
|
||||
buf[3] == 0x46 and
|
||||
buf[8] == 0x41 and
|
||||
buf[9] == 0x56 and
|
||||
buf[10] == 0x49 and
|
||||
buf[11] == 0x20)
|
||||
|
||||
|
||||
class Wmv(Type):
|
||||
"""
|
||||
Implements the WMV video type matcher.
|
||||
"""
|
||||
MIME = 'video/x-ms-wmv'
|
||||
EXTENSION = 'wmv'
|
||||
|
||||
def __init__(self):
|
||||
super(Wmv, self).__init__(
|
||||
mime=Wmv.MIME,
|
||||
extension=Wmv.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 9 and
|
||||
buf[0] == 0x30 and
|
||||
buf[1] == 0x26 and
|
||||
buf[2] == 0xB2 and
|
||||
buf[3] == 0x75 and
|
||||
buf[4] == 0x8E and
|
||||
buf[5] == 0x66 and
|
||||
buf[6] == 0xCF and
|
||||
buf[7] == 0x11 and
|
||||
buf[8] == 0xA6 and
|
||||
buf[9] == 0xD9)
|
||||
|
||||
|
||||
class Flv(Type):
|
||||
"""
|
||||
Implements the FLV video type matcher.
|
||||
"""
|
||||
MIME = 'video/x-flv'
|
||||
EXTENSION = 'flv'
|
||||
|
||||
def __init__(self):
|
||||
super(Flv, self).__init__(
|
||||
mime=Flv.MIME,
|
||||
extension=Flv.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x46 and
|
||||
buf[1] == 0x4C and
|
||||
buf[2] == 0x56 and
|
||||
buf[3] == 0x01)
|
||||
|
||||
|
||||
class Mpeg(Type):
|
||||
"""
|
||||
Implements the MPEG video type matcher.
|
||||
"""
|
||||
MIME = 'video/mpeg'
|
||||
EXTENSION = 'mpg'
|
||||
|
||||
def __init__(self):
|
||||
super(Mpeg, self).__init__(
|
||||
mime=Mpeg.MIME,
|
||||
extension=Mpeg.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return (len(buf) > 3 and
|
||||
buf[0] == 0x0 and
|
||||
buf[1] == 0x0 and
|
||||
buf[2] == 0x1 and
|
||||
buf[3] >= 0xb0 and
|
||||
buf[3] <= 0xbf)
|
||||
|
||||
|
||||
class M3gp(Type):
|
||||
"""Implements the 3gp image type matcher."""
|
||||
|
||||
MIME = 'video/3gpp'
|
||||
EXTENSION = '3gp'
|
||||
|
||||
def __init__(self):
|
||||
super(M3gp, self).__init__(
|
||||
mime=M3gp.MIME,
|
||||
extension=M3gp.EXTENSION
|
||||
)
|
||||
|
||||
def match(self, buf):
|
||||
return buf[:7] == bytearray([0x66, 0x74, 0x79, 0x70, 0x33, 0x67, 0x70])
|
84
libs/filetype/utils.py
Normal file
84
libs/filetype/utils.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Python 2.7 workaround
|
||||
try:
|
||||
import pathlib
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
_NUM_SIGNATURE_BYTES = 8192
|
||||
|
||||
|
||||
def get_signature_bytes(path):
|
||||
"""
|
||||
Reads file from disk and returns the first 8192 bytes
|
||||
of data representing the magic number header signature.
|
||||
|
||||
Args:
|
||||
path: path string to file.
|
||||
|
||||
Returns:
|
||||
First 8192 bytes of the file content as bytearray type.
|
||||
"""
|
||||
with open(path, 'rb') as fp:
|
||||
return bytearray(fp.read(_NUM_SIGNATURE_BYTES))
|
||||
|
||||
|
||||
def signature(array):
|
||||
"""
|
||||
Returns the first 8192 bytes of the given bytearray
|
||||
as part of the file header signature.
|
||||
|
||||
Args:
|
||||
array: bytearray to extract the header signature.
|
||||
|
||||
Returns:
|
||||
First 8192 bytes of the file content as bytearray type.
|
||||
"""
|
||||
length = len(array)
|
||||
index = _NUM_SIGNATURE_BYTES if length > _NUM_SIGNATURE_BYTES else length
|
||||
|
||||
return array[:index]
|
||||
|
||||
|
||||
def get_bytes(obj):
|
||||
"""
|
||||
Infers the input type and reads the first 8192 bytes,
|
||||
returning a sliced bytearray.
|
||||
|
||||
Args:
|
||||
obj: path to readable, file-like object(with read() method), bytes,
|
||||
bytearray or memoryview
|
||||
|
||||
Returns:
|
||||
First 8192 bytes of the file content as bytearray type.
|
||||
|
||||
Raises:
|
||||
TypeError: if obj is not a supported type.
|
||||
"""
|
||||
if isinstance(obj, bytearray):
|
||||
return signature(obj)
|
||||
|
||||
if isinstance(obj, str):
|
||||
return get_signature_bytes(obj)
|
||||
|
||||
if isinstance(obj, bytes):
|
||||
return signature(obj)
|
||||
|
||||
if isinstance(obj, memoryview):
|
||||
return bytearray(signature(obj).tolist())
|
||||
|
||||
if isinstance(obj, pathlib.PurePath):
|
||||
return get_signature_bytes(obj)
|
||||
|
||||
if hasattr(obj, 'read'):
|
||||
if hasattr(obj, 'tell') and hasattr(obj, 'seek'):
|
||||
start_pos = obj.tell()
|
||||
obj.seek(0)
|
||||
magic_bytes = obj.read(_NUM_SIGNATURE_BYTES)
|
||||
obj.seek(start_pos)
|
||||
return get_bytes(magic_bytes)
|
||||
return get_bytes(obj.read(_NUM_SIGNATURE_BYTES))
|
||||
|
||||
raise TypeError('Unsupported type as file input: %s' % type(obj))
|
|
@ -60,6 +60,9 @@ tzlocal==5.2
|
|||
# Required-by: beautifulsoup4
|
||||
soupsieve==2.3.2.post1
|
||||
|
||||
# Required-by: deathbycaptcha
|
||||
filetype==1.2.0
|
||||
|
||||
# Required-by: ffsubsync
|
||||
auditok<=0.1.5 # do not upgrade unless ffsubsync requirements.txt change
|
||||
ffmpeg-python==0.2.0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue