From 89a78fef32423c420ad109b531ec407b4e7faf39 Mon Sep 17 00:00:00 2001 From: Storm Dragon Date: Mon, 10 Aug 2020 09:35:15 -0400 Subject: [PATCH] Use pandoc to remove html from titles. --- triggers/link/link.sh | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/triggers/link/link.sh b/triggers/link/link.sh index 0fd5792..c7bbea9 100755 --- a/triggers/link/link.sh +++ b/triggers/link/link.sh @@ -2,16 +2,9 @@ for l in $3 ; do if [[ "${l}" =~ http://|https://|www\..* ]]; then -pageTitle="$(curl -s --connect-timeout 5 "$l" | sed -n -e 'H;${x;s!.*]*>\(.*\).*!\1!;T;s!.*\(.*\).*!\1!p}')" - -# Fix up pageTitle a bit. -pageTitle="${pageTitle//'/\'}" -pageTitle="${pageTitle//–/-}" - -shortLink="${l#*://}" -shortLink="${shortLink%%/*}" -if [ ${#pageTitle} -gt 1 ]; then -msg "$2" "$pageTitle at $shortLink" +pageTitle="$(curl -L -s --connect-timeout 5 "$l" | sed -n -e 'H;${x;s!.*]*>\(.*\).*!\1!;T;s!.*\(.*\).*!\1!p}' | pandoc -t plain | tr '[:space:]' ' ')" +if [[ ${#pageTitle} -gt 1 ]]; then +msg "$2" "$pageTitle" fi fi done