/url updated to actually work again.

2025-07-28 17:48:04 -04:00
parent 2a409645de
commit 5197af9eba
1 changed files with 166 additions and 63 deletions
@@ -176,7 +176,9 @@ BEGIN {
 # lists=                                 # Comma-separated list of lists to follow

 # === URL HANDLING ===
-# urlopen=echo %U                        # Command to open URLs (%U = URL)
+# urlopen=echo %U                        # Command to open URLs (%U = URL) - deprecated, use cli_browser/gui_browser
+# cli_browser=                           # CLI browser (w3m, elinks, lynx) - auto-detected if empty
+# gui_browser=                           # GUI browser (xdg-open, firefox, etc) - auto-detected if empty
 # shoreblogurl=http://is.gd/api.php?longurl=  # URL shortening service

 # === SYSTEM SETTINGS ===
@@ -265,7 +267,7 @@ EOF
 		rlurl noprompt shoreblogurl newline wrap verify autosplit
 		notimeline queryurl fediverseserver colourprompt colourme
 		colourdm colourreply colourwarn coloursearch colourlist idurl
-		urlopen delurl notrack dmdelurl favsurl
+		urlopen cli_browser gui_browser delurl notrack dmdelurl favsurl
 		favurl favdelurl slowpost notifies filter colourdefault
 		followurl leaveurl dmupdate mentions backload
 		lat long location searchhits blockurl blockdelurl woeid
@@ -1080,6 +1082,8 @@ $pause = (($anonymous) ? 120 : "auto") if (!defined $pause);
 $superverbose ||= 0;
 $avatar ||= "";
 $urlopen ||= 'echo %U';
+$cli_browser ||= '';
+$gui_browser ||= '';
 $hold ||= 0;
 $daemon ||= 0;
 $maxhist ||= 19;
@@ -3362,70 +3366,113 @@ EOF
 			$genurl = $idurl;
 		} 

-		# to be TOS-compliant, we must try entities first to use
-		# t.co wrapped links. this is a tiny version of /entities.
-		unless ($notco) {
-			my $id = $post->{'reblog'}->{'id_str'}
-				|| $post->{'id_str'};
-			my $hash;
-
-			# only fetch if we have to. if we already fetched
-			# because we were given a direct id_str instead of a
-			# menu code, then we already have the entities.
-			if ($code !~ /^[0-9]+$/) {
-				$hash = &grabjson("${genurl}?id=${id}",
-					0, 0, 0, undef, 1);
-			} else {
-				# MAKE MONEY FAST WITH OUR QUICK CACHE PLAN
-				$hash = $post;
+		# Extract URLs from Mastodon post content
+		my $didprint = 0;
+		
+		# First, try to get the FULL post data from the API to access structured URL fields
+		my $full_post = undef;
+		if (defined($genurl) && defined($post->{'id_str'})) {
+			my $post_id = $post->{'id_str'};
+			# Replace %I placeholder with actual post ID
+			my $api_url = $genurl;
+			$api_url =~ s/%I/$post_id/g;
+			print STDERR "-- DEBUG: Fetching full post data from API for URL extraction: $api_url\n" if ($superverbose);
+			$full_post = &grabjson($api_url, 0, 0, 0, undef, 1);
+		}
+		
+		# Use full post data if available, otherwise fall back to cached data
+		my $working_post = (defined($full_post) && ref($full_post) eq 'HASH') ? $full_post : $post;
+		
+		# DEBUG: Show what we have in the post structure
+		print STDERR "-- DEBUG: URL parsing - using " . (defined($full_post) ? "FULL" : "CACHED") . " post data\n" if ($superverbose);
+		print STDERR "-- DEBUG: URL parsing - post keys: " . join(", ", keys %$working_post) . "\n" if ($superverbose);
+		print STDERR "-- DEBUG: URL parsing - content field: '" . ($working_post->{'content'} || 'UNDEFINED') . "'\n" if ($superverbose);
+		print STDERR "-- DEBUG: URL parsing - text field: '" . ($working_post->{'text'} || 'UNDEFINED') . "'\n" if ($superverbose);
+		
+		# PRIORITY 1: Extract URLs from Mastodon's structured URL data (card/preview_url)
+		if (defined($working_post->{'card'}) && ref($working_post->{'card'}) eq 'HASH') {
+			my $card = $working_post->{'card'};
+			if (defined($card->{'url'}) && length($card->{'url'})) {
+				print STDERR "-- DEBUG: Found card URL: " . $card->{'url'} . "\n" if ($superverbose);
+				&openurl($card->{'url'});
+				$didprint++;
 			}
-			if (defined($hash) && ref($hash) eq 'HASH') {
-				my $w;
-				my $v;
-				my $didprint = 0;
-
-				# fediverse puts entities in multiple fields.
-				foreach $w (qw(media urls)) {
-					my $p = $hash->{'entities'}->{$w};
-					next if (!defined($p) ||
-						ref($p) ne 'ARRAY');
-					foreach $v (@{ $p }) {
-						next if (!defined($v) ||
-							ref($v) ne 'HASH');
-						next if (!length($v->{'url'}) ||
-							(!length($v->{'expanded_url'}) &&
-					 		!length($v->{'media_url'})));
-						my $u1 = &descape($v->{'url'});
-						&openurl($u1);
-						$didprint++;
-					}
+		}
+		
+		# PRIORITY 2: Extract URLs from preview_url field (some servers use this)
+		if (!$didprint && defined($working_post->{'preview_url'}) && length($working_post->{'preview_url'})) {
+			print STDERR "-- DEBUG: Found preview_url: " . $working_post->{'preview_url'} . "\n" if ($superverbose);
+			&openurl($working_post->{'preview_url'});
+			$didprint++;
+		}
+		
+		# PRIORITY 3: Media attachments
+		if (!$didprint && defined($working_post->{'media_attachments'}) && 
+		    ref($working_post->{'media_attachments'}) eq 'ARRAY') {
+			foreach my $media (@{ $working_post->{'media_attachments'} }) {
+				if (defined($media->{'url'}) && length($media->{'url'})) {
+					print STDERR "-- DEBUG: Found media URL: " . $media->{'url'} . "\n" if ($superverbose);
+					&openurl($media->{'url'});
+					$didprint++;
 				}
-				print $stdout
-				"-- sorry, couldn't find any URL.\n"
-					if (!$didprint);
-				return 0;
 			}
-			print $stdout
-				"-- unable to use t.co URLs, using fallback\n";
 		}
-		# that failed, so fall back on the old method.
-		my $text = &descape($post->{'text'});
-		# findallurls
-		while ($text
-	=~ s#(h?ttp|h?ttps|ftp|gopher)://([a-zA-Z0-9_~/:%\-\+\.\=\&\?\#,]+)##){
-# sigh. I HATE YOU TINYARRO.WS
-#TODO
-# eventually we will have to put a punycode implementation into openurl
-# to handle things like Mac OS X's open which don't understand UTF-8 URLs.
-# when we do, uncomment this again
-#	=~ s#(http|https|ftp|gopher)://([^'\\]+?)('|\\|\s|$)##) {
-			my $url = $1 . "://$2";
-			$url = "h$url" if ($url =~ /^ttps?:/);
-			$url =~ s/[\.\?]$//;
-			&openurl($url);
+		
+		# PRIORITY 4: Parse URLs from HTML content (href attributes)
+		if (!$didprint) {
+			my $content = $working_post->{'content'} || $working_post->{'text'} || '';
+			if (length($content)) {
+				# Extract URLs from href attributes in HTML
+				while ($content =~ s/<a[^>]+href=["']([^"']+)["'][^>]*>[^<]*<\/a>//i) {
+					my $url = $1;
+					next if ($url =~ /^#/); # Skip hashtag links
+					next if ($url =~ /^\@/); # Skip mention links
+					print STDERR "-- DEBUG: Found HTML href URL: " . $url . "\n" if ($superverbose);
+					&openurl($url);
+					$didprint++;
+				}
+			}
 		}
+		
+		if ($didprint) {
+			return 0;
+		}
+		# PRIORITY 5: Final fallback - parse plain text URLs from display text
+		# This handles truncated URLs in display text as a last resort
+		if (!$didprint) {
+			# Re-get content since we may have modified it above with regex substitutions
+			my $original_content = $working_post->{'content'} || $working_post->{'text'} || '';
+			my $plain_content = &html_to_text($original_content);
+			$plain_content = &descape($plain_content);
+			
+			print STDERR "-- DEBUG: Parsing plain text content: '$plain_content'\n" if ($superverbose);
+			
+			# findallurls - extract any remaining URLs from plain text
+			# First try URLs with protocols
+			while ($plain_content
+		=~ s#(h?ttp|h?ttps|ftp|gopher)://([a-zA-Z0-9_~/:%\-\+\.\=\&\?\#,]+)##){
+				my $url = $1 . "://$2";
+				$url = "h$url" if ($url =~ /^ttps?:/);
+				$url =~ s/[\.\?]$//;
+				print STDERR "-- DEBUG: Found plain text URL with protocol: " . $url . "\n" if ($superverbose);
+				&openurl($url);
+				$didprint++;
+			}
+			
+			# Then try URLs without protocols (assume https) - WARNING: may be truncated
+			while ($plain_content
+		=~ s#\b([a-zA-Z0-9\-]+\.[a-zA-Z]{2,}(?:/[a-zA-Z0-9_~/%:\-\+\.\=\&\?\#,]*)?)\b##){
+				my $url = "https://$1";
+				$url =~ s/[\.\?]$//;
+				print STDERR "-- DEBUG: Found plain text URL without protocol (may be truncated): " . $url . "\n" if ($superverbose);
+				print $stdout "-- WARNING: URL may be truncated from display text: $url\n";
+				&openurl($url);
+				$didprint++;
+			}
+		}
+		
 		print $stdout "-- sorry, couldn't find any URL.\n"
-			if (!defined($urlshort));
+			if (!$didprint);
 		return 0;
 	}

@@ -7932,15 +7979,71 @@ sub generate_shortdomain {
 		if (!length($shoreblogurldomain));
 }

+sub detect_browser {
+	my $is_gui = defined($ENV{'DISPLAY'}) && length($ENV{'DISPLAY'});
+	my $browser;
+	
+	if ($is_gui) {
+		# GUI environment - check gui_browser setting first
+		if (length($gui_browser)) {
+			return ($gui_browser, 1);  # Return browser and background flag
+		}
+		
+		# Try xdg-open first (most reliable on Linux)
+		if (system("which xdg-open >/dev/null 2>&1") == 0) {
+			return ('xdg-open %U', 1);
+		}
+		
+		# Fallback GUI browsers
+		my @gui_browsers = qw(brave chromium firefox google-chrome);
+		foreach my $br (@gui_browsers) {
+			if (system("which $br >/dev/null 2>&1") == 0) {
+				return ("$br %U", 1);
+			}
+		}
+		
+		# If no GUI browser found, fall back to CLI
+		print STDERR "-- warning: no GUI browser found, falling back to CLI browser\n";
+	}
+	
+	# CLI environment or fallback - check cli_browser setting first
+	if (length($cli_browser)) {
+		return ($cli_browser, 0);  # CLI browsers are blocking
+	}
+	
+	# Try CLI browsers in order of preference
+	my @cli_browsers = qw(w3m elinks lynx);
+	foreach my $br (@cli_browsers) {
+		if (system("which $br >/dev/null 2>&1") == 0) {
+			return ("$br %U", 0);
+		}
+	}
+	
+	# Ultimate fallback - use the old urlopen setting
+	return ($urlopen, 0);
+}
+
 sub openurl {
-	my $comm = $urlopen;
 	my $url = shift;
+	my ($comm, $should_background) = &detect_browser();
+	
+	# Handle gopher URLs through gateway if not using lynx
 	$url = "http://gopher.floodgap.com/gopher/gw?".&url_oauth_sub($url)
 		if ($url =~ m#^gopher://# && $comm !~ /^[^\s]*lynx/);
+	
 	$urlshort = $url;
 	$comm =~ s/\%U/'$url'/g;
-	print $stdout "($comm)\n";
-	system("$comm");
+	
+	if ($should_background) {
+		# Background GUI browsers
+		$comm .= " &";
+		print $stdout "($comm)\n";
+		system("$comm");
+	} else {
+		# CLI browsers run in foreground
+		print $stdout "($comm)\n";
+		system("$comm");
+	}
 }

 sub urlshorten {