diff --git a/.gitignore b/.gitignore index 042257d..88d4214 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,364 @@ -.coverage -.idea/ -bin/ + +# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,venv,windows,linux,macos,vim,emacs,sublimetext +# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,python,venv,windows,linux,macos,vim,emacs,sublimetext + +### Emacs ### +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# reftex files +*.rel + +# AUCTeX auto folder +/auto/ + +# cask packages +.cask/ dist/ -*.egg-info/ + +# Flycheck +flycheck_*.el + +# server auth directory +/server/ + +# projectiles files +.projectile + +# directory configuration +.dir-locals.el + +# network security +/network-security.data + + +### Linux ### + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### Python ### +# Byte-compiled / optimized / DLL files __pycache__/ -venv/ -practice/ -.vscode +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python build/ +develop-eggs/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### SublimeText ### +# Cache files for Sublime Text +*.tmlanguage.cache +*.tmPreferences.cache +*.stTheme.cache + +# Workspace files are user-specific +*.sublime-workspace + +# Project files should be checked into the repository, unless a significant +# proportion of contributors will probably not be using Sublime Text +# *.sublime-project + +# SFTP configuration file +sftp-config.json +sftp-config-alt*.json + +# Package control specific files +Package Control.last-run +Package Control.ca-list +Package Control.ca-bundle +Package Control.system-ca-bundle +Package Control.cache/ +Package Control.ca-certs/ +Package Control.merged-ca-bundle +Package Control.user-ca-bundle +oscrypto-ca-bundle.crt +bh_unicode_properties.cache + +# Sublime-github package stores a github token in this file +# https://packagecontrol.io/packages/sublime-github +GitHub.sublime-settings + +### venv ### +# Virtualenv +# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ +[Bb]in +[Ii]nclude +[Ll]ib +[Ll]ib64 +[Ll]ocal +[Ss]cripts +pyvenv.cfg +pip-selfcheck.json + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +# Support for Project snippet scope + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,python,venv,windows,linux,macos,vim,emacs,sublimetext \ No newline at end of file diff --git a/linkpreview/__init__.py b/linkpreview/__init__.py index d8653d9..e914e62 100644 --- a/linkpreview/__init__.py +++ b/linkpreview/__init__.py @@ -4,6 +4,6 @@ from .compose import link_preview from .exceptions import LinkPreviewException -__version__ = "0.2.0" +__version__ = "0.3.0" __all__ = (Link, LinkGrabber, LinkPreview, link_preview, LinkPreviewException) diff --git a/linkpreview/preview.py b/linkpreview/preview.py index 7e9c21f..0dbe2de 100644 --- a/linkpreview/preview.py +++ b/linkpreview/preview.py @@ -25,6 +25,10 @@ def description(self): def image(self): raise NotImplementedError + @property + def video(self): + raise NotImplementedError + class Generic(PreviewBase): """ @@ -68,20 +72,28 @@ def description(self): @property def image(self): + return self._get_featured_media("img") + + @property + def video(self): + return self._get_featured_media("video") + + def _get_featured_media(self, tag_name): """ - Extract preview image from the given web page. + Extract preview media from the given web page. """ soup = self._soup - # extract the first image which is sibling to the first h1 + # extract the first media which is sibling to the first h1 first_h1 = soup.find("h1") if not first_h1: return - first_image = first_h1.find_next_sibling("img") - if first_image and first_image["src"]: + first_image = first_h1.find_next_sibling(tag_name) + if first_image and first_image.get("src"): return first_image["src"] + class SocialPreviewBase(PreviewBase): """ Abstract class for OpenGraph, TwitterCard and Google+. @@ -91,8 +103,14 @@ class SocialPreviewBase(PreviewBase): def _get_property(self, name): meta = self._soup.find("meta", attrs={self.__target_attr__: name}) - if meta and meta["content"]: - return meta["content"] + if meta: + return meta.get("content", meta.get("value")) + + def _get_first_property_match(self, names): + for name in names: + value = self._get_property(name) + if value: + return value class OpenGraph(SocialPreviewBase): @@ -113,7 +131,11 @@ def description(self): @property def image(self): - return self._get_property("og:image") + return self._get_first_property_match(["og:image", "og:image:secure_url", "og:image:url"]) + + @property + def video(self): + return self._get_first_property_match(["og:video", "og:video:secure_url", "og:video:url"]) class TwitterCard(SocialPreviewBase): @@ -136,6 +158,10 @@ def description(self): def image(self): return self._get_property("twitter:image") + @property + def video(self): + return self._get_property("twitter:player") + class Schema(SocialPreviewBase): """ @@ -157,6 +183,10 @@ def description(self): def image(self): return self._get_property("image") + @property + def video(self): + return self._get_property("video") + class LinkPreview: def __init__(self, link: Link, parser: str = "html.parser"): @@ -184,6 +214,10 @@ def description(self): def image(self): return self._find_attribute("image") + @LazyAttribute + def video(self): + return self._find_attribute("video") + @LazyAttribute def absolute_image(self): if not self.image: