Use pandoc to remove html from titles.
This commit is contained in:
		| @@ -2,16 +2,9 @@ | ||||
|  | ||||
| for l in $3 ; do | ||||
| if [[ "${l}" =~ http://|https://|www\..* ]]; then | ||||
| pageTitle="$(curl -s --connect-timeout 5 "$l" | sed -n -e 'H;${x;s!.*<head[^>]*>\(.*\)</head>.*!\1!;T;s!.*<title>\(.*\)</title>.*!\1!p}')" | ||||
|  | ||||
| # Fix up pageTitle a bit. | ||||
| pageTitle="${pageTitle//'/\'}" | ||||
| pageTitle="${pageTitle//–/-}" | ||||
|  | ||||
| shortLink="${l#*://}" | ||||
| shortLink="${shortLink%%/*}" | ||||
| if [ ${#pageTitle} -gt 1 ]; then | ||||
| msg "$2" "$pageTitle at $shortLink" | ||||
| pageTitle="$(curl -L -s --connect-timeout 5 "$l" | sed -n -e 'H;${x;s!.*<head[^>]*>\(.*\)</head>.*!\1!;T;s!.*<title>\(.*\)</title>.*!\1!p}' | pandoc -t plain | tr '[:space:]' ' ')" | ||||
| if [[ ${#pageTitle} -gt 1 ]]; then | ||||
| msg "$2" "$pageTitle" | ||||
| fi | ||||
| fi | ||||
| done | ||||
|   | ||||
		Reference in New Issue
	
	Block a user