diff --git a/ttyverse.pl b/ttyverse.pl index 151a39f..f989220 100755 --- a/ttyverse.pl +++ b/ttyverse.pl @@ -176,7 +176,9 @@ BEGIN { # lists= # Comma-separated list of lists to follow # === URL HANDLING === -# urlopen=echo %U # Command to open URLs (%U = URL) +# urlopen=echo %U # Command to open URLs (%U = URL) - deprecated, use cli_browser/gui_browser +# cli_browser= # CLI browser (w3m, elinks, lynx) - auto-detected if empty +# gui_browser= # GUI browser (xdg-open, firefox, etc) - auto-detected if empty # shoreblogurl=http://is.gd/api.php?longurl= # URL shortening service # === SYSTEM SETTINGS === @@ -265,7 +267,7 @@ EOF rlurl noprompt shoreblogurl newline wrap verify autosplit notimeline queryurl fediverseserver colourprompt colourme colourdm colourreply colourwarn coloursearch colourlist idurl - urlopen delurl notrack dmdelurl favsurl + urlopen cli_browser gui_browser delurl notrack dmdelurl favsurl favurl favdelurl slowpost notifies filter colourdefault followurl leaveurl dmupdate mentions backload lat long location searchhits blockurl blockdelurl woeid @@ -1080,6 +1082,8 @@ $pause = (($anonymous) ? 120 : "auto") if (!defined $pause); $superverbose ||= 0; $avatar ||= ""; $urlopen ||= 'echo %U'; +$cli_browser ||= ''; +$gui_browser ||= ''; $hold ||= 0; $daemon ||= 0; $maxhist ||= 19; @@ -3362,70 +3366,113 @@ EOF $genurl = $idurl; } - # to be TOS-compliant, we must try entities first to use - # t.co wrapped links. this is a tiny version of /entities. - unless ($notco) { - my $id = $post->{'reblog'}->{'id_str'} - || $post->{'id_str'}; - my $hash; - - # only fetch if we have to. if we already fetched - # because we were given a direct id_str instead of a - # menu code, then we already have the entities. - if ($code !~ /^[0-9]+$/) { - $hash = &grabjson("${genurl}?id=${id}", - 0, 0, 0, undef, 1); - } else { - # MAKE MONEY FAST WITH OUR QUICK CACHE PLAN - $hash = $post; + # Extract URLs from Mastodon post content + my $didprint = 0; + + # First, try to get the FULL post data from the API to access structured URL fields + my $full_post = undef; + if (defined($genurl) && defined($post->{'id_str'})) { + my $post_id = $post->{'id_str'}; + # Replace %I placeholder with actual post ID + my $api_url = $genurl; + $api_url =~ s/%I/$post_id/g; + print STDERR "-- DEBUG: Fetching full post data from API for URL extraction: $api_url\n" if ($superverbose); + $full_post = &grabjson($api_url, 0, 0, 0, undef, 1); + } + + # Use full post data if available, otherwise fall back to cached data + my $working_post = (defined($full_post) && ref($full_post) eq 'HASH') ? $full_post : $post; + + # DEBUG: Show what we have in the post structure + print STDERR "-- DEBUG: URL parsing - using " . (defined($full_post) ? "FULL" : "CACHED") . " post data\n" if ($superverbose); + print STDERR "-- DEBUG: URL parsing - post keys: " . join(", ", keys %$working_post) . "\n" if ($superverbose); + print STDERR "-- DEBUG: URL parsing - content field: '" . ($working_post->{'content'} || 'UNDEFINED') . "'\n" if ($superverbose); + print STDERR "-- DEBUG: URL parsing - text field: '" . ($working_post->{'text'} || 'UNDEFINED') . "'\n" if ($superverbose); + + # PRIORITY 1: Extract URLs from Mastodon's structured URL data (card/preview_url) + if (defined($working_post->{'card'}) && ref($working_post->{'card'}) eq 'HASH') { + my $card = $working_post->{'card'}; + if (defined($card->{'url'}) && length($card->{'url'})) { + print STDERR "-- DEBUG: Found card URL: " . $card->{'url'} . "\n" if ($superverbose); + &openurl($card->{'url'}); + $didprint++; } - if (defined($hash) && ref($hash) eq 'HASH') { - my $w; - my $v; - my $didprint = 0; - - # fediverse puts entities in multiple fields. - foreach $w (qw(media urls)) { - my $p = $hash->{'entities'}->{$w}; - next if (!defined($p) || - ref($p) ne 'ARRAY'); - foreach $v (@{ $p }) { - next if (!defined($v) || - ref($v) ne 'HASH'); - next if (!length($v->{'url'}) || - (!length($v->{'expanded_url'}) && - !length($v->{'media_url'}))); - my $u1 = &descape($v->{'url'}); - &openurl($u1); - $didprint++; - } + } + + # PRIORITY 2: Extract URLs from preview_url field (some servers use this) + if (!$didprint && defined($working_post->{'preview_url'}) && length($working_post->{'preview_url'})) { + print STDERR "-- DEBUG: Found preview_url: " . $working_post->{'preview_url'} . "\n" if ($superverbose); + &openurl($working_post->{'preview_url'}); + $didprint++; + } + + # PRIORITY 3: Media attachments + if (!$didprint && defined($working_post->{'media_attachments'}) && + ref($working_post->{'media_attachments'}) eq 'ARRAY') { + foreach my $media (@{ $working_post->{'media_attachments'} }) { + if (defined($media->{'url'}) && length($media->{'url'})) { + print STDERR "-- DEBUG: Found media URL: " . $media->{'url'} . "\n" if ($superverbose); + &openurl($media->{'url'}); + $didprint++; } - print $stdout - "-- sorry, couldn't find any URL.\n" - if (!$didprint); - return 0; } - print $stdout - "-- unable to use t.co URLs, using fallback\n"; } - # that failed, so fall back on the old method. - my $text = &descape($post->{'text'}); - # findallurls - while ($text - =~ s#(h?ttp|h?ttps|ftp|gopher)://([a-zA-Z0-9_~/:%\-\+\.\=\&\?\#,]+)##){ -# sigh. I HATE YOU TINYARRO.WS -#TODO -# eventually we will have to put a punycode implementation into openurl -# to handle things like Mac OS X's open which don't understand UTF-8 URLs. -# when we do, uncomment this again -# =~ s#(http|https|ftp|gopher)://([^'\\]+?)('|\\|\s|$)##) { - my $url = $1 . "://$2"; - $url = "h$url" if ($url =~ /^ttps?:/); - $url =~ s/[\.\?]$//; - &openurl($url); + + # PRIORITY 4: Parse URLs from HTML content (href attributes) + if (!$didprint) { + my $content = $working_post->{'content'} || $working_post->{'text'} || ''; + if (length($content)) { + # Extract URLs from href attributes in HTML + while ($content =~ s/]+href=["']([^"']+)["'][^>]*>[^<]*<\/a>//i) { + my $url = $1; + next if ($url =~ /^#/); # Skip hashtag links + next if ($url =~ /^\@/); # Skip mention links + print STDERR "-- DEBUG: Found HTML href URL: " . $url . "\n" if ($superverbose); + &openurl($url); + $didprint++; + } + } } + + if ($didprint) { + return 0; + } + # PRIORITY 5: Final fallback - parse plain text URLs from display text + # This handles truncated URLs in display text as a last resort + if (!$didprint) { + # Re-get content since we may have modified it above with regex substitutions + my $original_content = $working_post->{'content'} || $working_post->{'text'} || ''; + my $plain_content = &html_to_text($original_content); + $plain_content = &descape($plain_content); + + print STDERR "-- DEBUG: Parsing plain text content: '$plain_content'\n" if ($superverbose); + + # findallurls - extract any remaining URLs from plain text + # First try URLs with protocols + while ($plain_content + =~ s#(h?ttp|h?ttps|ftp|gopher)://([a-zA-Z0-9_~/:%\-\+\.\=\&\?\#,]+)##){ + my $url = $1 . "://$2"; + $url = "h$url" if ($url =~ /^ttps?:/); + $url =~ s/[\.\?]$//; + print STDERR "-- DEBUG: Found plain text URL with protocol: " . $url . "\n" if ($superverbose); + &openurl($url); + $didprint++; + } + + # Then try URLs without protocols (assume https) - WARNING: may be truncated + while ($plain_content + =~ s#\b([a-zA-Z0-9\-]+\.[a-zA-Z]{2,}(?:/[a-zA-Z0-9_~/%:\-\+\.\=\&\?\#,]*)?)\b##){ + my $url = "https://$1"; + $url =~ s/[\.\?]$//; + print STDERR "-- DEBUG: Found plain text URL without protocol (may be truncated): " . $url . "\n" if ($superverbose); + print $stdout "-- WARNING: URL may be truncated from display text: $url\n"; + &openurl($url); + $didprint++; + } + } + print $stdout "-- sorry, couldn't find any URL.\n" - if (!defined($urlshort)); + if (!$didprint); return 0; } @@ -7932,15 +7979,71 @@ sub generate_shortdomain { if (!length($shoreblogurldomain)); } +sub detect_browser { + my $is_gui = defined($ENV{'DISPLAY'}) && length($ENV{'DISPLAY'}); + my $browser; + + if ($is_gui) { + # GUI environment - check gui_browser setting first + if (length($gui_browser)) { + return ($gui_browser, 1); # Return browser and background flag + } + + # Try xdg-open first (most reliable on Linux) + if (system("which xdg-open >/dev/null 2>&1") == 0) { + return ('xdg-open %U', 1); + } + + # Fallback GUI browsers + my @gui_browsers = qw(brave chromium firefox google-chrome); + foreach my $br (@gui_browsers) { + if (system("which $br >/dev/null 2>&1") == 0) { + return ("$br %U", 1); + } + } + + # If no GUI browser found, fall back to CLI + print STDERR "-- warning: no GUI browser found, falling back to CLI browser\n"; + } + + # CLI environment or fallback - check cli_browser setting first + if (length($cli_browser)) { + return ($cli_browser, 0); # CLI browsers are blocking + } + + # Try CLI browsers in order of preference + my @cli_browsers = qw(w3m elinks lynx); + foreach my $br (@cli_browsers) { + if (system("which $br >/dev/null 2>&1") == 0) { + return ("$br %U", 0); + } + } + + # Ultimate fallback - use the old urlopen setting + return ($urlopen, 0); +} + sub openurl { - my $comm = $urlopen; my $url = shift; + my ($comm, $should_background) = &detect_browser(); + + # Handle gopher URLs through gateway if not using lynx $url = "http://gopher.floodgap.com/gopher/gw?".&url_oauth_sub($url) if ($url =~ m#^gopher://# && $comm !~ /^[^\s]*lynx/); + $urlshort = $url; $comm =~ s/\%U/'$url'/g; - print $stdout "($comm)\n"; - system("$comm"); + + if ($should_background) { + # Background GUI browsers + $comm .= " &"; + print $stdout "($comm)\n"; + system("$comm"); + } else { + # CLI browsers run in foreground + print $stdout "($comm)\n"; + system("$comm"); + } } sub urlshorten {