From f05551be7ef7072024f5dba9980ef8b0eb6aff0c Mon Sep 17 00:00:00 2001 From: Andreas Stocker Date: Sun, 25 Feb 2018 10:19:05 +0100 Subject: [PATCH] Implemented NFKD normalisation --- README.md | 1 + slugify | 19 ++++++++++++++++++- slugify.1 | 6 +++++- slugify.1.ronn | 5 ++++- 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1b3da96..b2154f6 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Simply enter the slugify command without any arguments or with the -h option to -d: replace spaces with dashes (instead of default underscores) -h: help -i: ignore case + -k: NFKD normalisation -n: dry run -t: treat existing dashes as spaces -u: treat existing underscores as spaces (useful with -a, -c, or -d) diff --git a/slugify b/slugify index d7b9d68..d685003 100755 --- a/slugify +++ b/slugify @@ -15,10 +15,11 @@ ignore_case=0 dry_run=0 dashes_to_spaces=0 underscores_to_spaces=0 +nfkd_normalisation=0 verbose=0 ## Initialize valid options -opt_string=acdhintuv +opt_string=acdhikntuv ## Usage function function print_usage(){ @@ -28,12 +29,22 @@ function print_usage(){ echo " -d: replace spaces with dashes (instead of default underscores)" echo " -h: help" echo " -i: ignore case" + echo " -k: NFKD normalisation" echo " -n: dry run" echo " -t: treat existing dashes as spaces" echo " -u: treat existing underscores as spaces (useful with -a, -c, or -d)" echo " -v: verbose" } +## NFKD normalisation function +function nfkd(){ + if [ -x "$(command -v python3)" ]; then + python3 -c 'import sys,unicodedata;print(unicodedata.normalize("NFKD", sys.argv[1]).encode("ascii", "ignore"))' "$1" + else + python2 -c 'import sys,unicodedata;print unicodedata.normalize("NFKD", unicode(sys.argv[1], "utf-8")).encode("ascii", "ignore")' "$1" + fi +} + ## For each provided option arg while getopts $opt_string opt do @@ -46,6 +57,7 @@ do n) dry_run=1 ;; t) dashes_to_spaces=1 ;; u) underscores_to_spaces=1 ;; + k) nfkd_normalisation=1 ;; v) verbose=1 ;; *) exit 1 ;; esac @@ -87,6 +99,11 @@ for source in "$@"; do ## Initialize target target="$source" + ## Optionally do an NFKD normalisation + if [ $nfkd_normalisation -eq 1 ]; then + target=$(nfkd "$target") + fi + ## Optionally convert to lowercase if [ $ignore_case -eq 0 ]; then target=$(echo "$target" | tr A-Z a-z ) diff --git a/slugify.1 b/slugify.1 index 3476e5b..90b61c3 100644 --- a/slugify.1 +++ b/slugify.1 @@ -7,7 +7,7 @@ \fBslugify\fR \- convert filenames and directories to a web friendly format . .SH "SYNOPSIS" -\fBslugify\fR [\-acdhintuv] \fIsource_file\fR \.\.\. +\fBslugify\fR [\-acdhikntuv] \fIsource_file\fR \.\.\. . .SH "DESCRIPTION" \fBSlugify\fR converts filenames and directories to a web friendly format\. Before running any command, consider a dry run \fB\-n\fR before hand\. @@ -36,6 +36,10 @@ Display help\. Ignore case\. . .TP +\fB\-k\fR +NFKD normalisation\. +. +.TP \fB\-n\fR Dry run\. . diff --git a/slugify.1.ronn b/slugify.1.ronn index 7fa53f7..35fffc7 100644 --- a/slugify.1.ronn +++ b/slugify.1.ronn @@ -3,7 +3,7 @@ slugify(1) -- convert filenames and directories to a web friendly format ## SYNOPSIS -`slugify` [-acdhintuv] ... +`slugify` [-acdhikntuv] ... ## DESCRIPTION @@ -30,6 +30,9 @@ Options include: * `-i`: Ignore case. + * `-k`: + NFKD normalisation. + * `-n`: Dry run.