/url updated to actually work again.

This commit is contained in:
Storm Dragon
2025-07-28 17:48:04 -04:00
parent 2a409645de
commit 5197af9eba
+166 -63
View File
@@ -176,7 +176,9 @@ BEGIN {
# lists= # Comma-separated list of lists to follow # lists= # Comma-separated list of lists to follow
# === URL HANDLING === # === URL HANDLING ===
# urlopen=echo %U # Command to open URLs (%U = URL) # urlopen=echo %U # Command to open URLs (%U = URL) - deprecated, use cli_browser/gui_browser
# cli_browser= # CLI browser (w3m, elinks, lynx) - auto-detected if empty
# gui_browser= # GUI browser (xdg-open, firefox, etc) - auto-detected if empty
# shoreblogurl=http://is.gd/api.php?longurl= # URL shortening service # shoreblogurl=http://is.gd/api.php?longurl= # URL shortening service
# === SYSTEM SETTINGS === # === SYSTEM SETTINGS ===
@@ -265,7 +267,7 @@ EOF
rlurl noprompt shoreblogurl newline wrap verify autosplit rlurl noprompt shoreblogurl newline wrap verify autosplit
notimeline queryurl fediverseserver colourprompt colourme notimeline queryurl fediverseserver colourprompt colourme
colourdm colourreply colourwarn coloursearch colourlist idurl colourdm colourreply colourwarn coloursearch colourlist idurl
urlopen delurl notrack dmdelurl favsurl urlopen cli_browser gui_browser delurl notrack dmdelurl favsurl
favurl favdelurl slowpost notifies filter colourdefault favurl favdelurl slowpost notifies filter colourdefault
followurl leaveurl dmupdate mentions backload followurl leaveurl dmupdate mentions backload
lat long location searchhits blockurl blockdelurl woeid lat long location searchhits blockurl blockdelurl woeid
@@ -1080,6 +1082,8 @@ $pause = (($anonymous) ? 120 : "auto") if (!defined $pause);
$superverbose ||= 0; $superverbose ||= 0;
$avatar ||= ""; $avatar ||= "";
$urlopen ||= 'echo %U'; $urlopen ||= 'echo %U';
$cli_browser ||= '';
$gui_browser ||= '';
$hold ||= 0; $hold ||= 0;
$daemon ||= 0; $daemon ||= 0;
$maxhist ||= 19; $maxhist ||= 19;
@@ -3362,70 +3366,113 @@ EOF
$genurl = $idurl; $genurl = $idurl;
} }
# to be TOS-compliant, we must try entities first to use # Extract URLs from Mastodon post content
# t.co wrapped links. this is a tiny version of /entities. my $didprint = 0;
unless ($notco) {
my $id = $post->{'reblog'}->{'id_str'} # First, try to get the FULL post data from the API to access structured URL fields
|| $post->{'id_str'}; my $full_post = undef;
my $hash; if (defined($genurl) && defined($post->{'id_str'})) {
my $post_id = $post->{'id_str'};
# only fetch if we have to. if we already fetched # Replace %I placeholder with actual post ID
# because we were given a direct id_str instead of a my $api_url = $genurl;
# menu code, then we already have the entities. $api_url =~ s/%I/$post_id/g;
if ($code !~ /^[0-9]+$/) { print STDERR "-- DEBUG: Fetching full post data from API for URL extraction: $api_url\n" if ($superverbose);
$hash = &grabjson("${genurl}?id=${id}", $full_post = &grabjson($api_url, 0, 0, 0, undef, 1);
0, 0, 0, undef, 1); }
} else {
# MAKE MONEY FAST WITH OUR QUICK CACHE PLAN # Use full post data if available, otherwise fall back to cached data
$hash = $post; my $working_post = (defined($full_post) && ref($full_post) eq 'HASH') ? $full_post : $post;
# DEBUG: Show what we have in the post structure
print STDERR "-- DEBUG: URL parsing - using " . (defined($full_post) ? "FULL" : "CACHED") . " post data\n" if ($superverbose);
print STDERR "-- DEBUG: URL parsing - post keys: " . join(", ", keys %$working_post) . "\n" if ($superverbose);
print STDERR "-- DEBUG: URL parsing - content field: '" . ($working_post->{'content'} || 'UNDEFINED') . "'\n" if ($superverbose);
print STDERR "-- DEBUG: URL parsing - text field: '" . ($working_post->{'text'} || 'UNDEFINED') . "'\n" if ($superverbose);
# PRIORITY 1: Extract URLs from Mastodon's structured URL data (card/preview_url)
if (defined($working_post->{'card'}) && ref($working_post->{'card'}) eq 'HASH') {
my $card = $working_post->{'card'};
if (defined($card->{'url'}) && length($card->{'url'})) {
print STDERR "-- DEBUG: Found card URL: " . $card->{'url'} . "\n" if ($superverbose);
&openurl($card->{'url'});
$didprint++;
} }
if (defined($hash) && ref($hash) eq 'HASH') { }
my $w;
my $v; # PRIORITY 2: Extract URLs from preview_url field (some servers use this)
my $didprint = 0; if (!$didprint && defined($working_post->{'preview_url'}) && length($working_post->{'preview_url'})) {
print STDERR "-- DEBUG: Found preview_url: " . $working_post->{'preview_url'} . "\n" if ($superverbose);
# fediverse puts entities in multiple fields. &openurl($working_post->{'preview_url'});
foreach $w (qw(media urls)) { $didprint++;
my $p = $hash->{'entities'}->{$w}; }
next if (!defined($p) ||
ref($p) ne 'ARRAY'); # PRIORITY 3: Media attachments
foreach $v (@{ $p }) { if (!$didprint && defined($working_post->{'media_attachments'}) &&
next if (!defined($v) || ref($working_post->{'media_attachments'}) eq 'ARRAY') {
ref($v) ne 'HASH'); foreach my $media (@{ $working_post->{'media_attachments'} }) {
next if (!length($v->{'url'}) || if (defined($media->{'url'}) && length($media->{'url'})) {
(!length($v->{'expanded_url'}) && print STDERR "-- DEBUG: Found media URL: " . $media->{'url'} . "\n" if ($superverbose);
!length($v->{'media_url'}))); &openurl($media->{'url'});
my $u1 = &descape($v->{'url'}); $didprint++;
&openurl($u1);
$didprint++;
}
} }
print $stdout
"-- sorry, couldn't find any URL.\n"
if (!$didprint);
return 0;
} }
print $stdout
"-- unable to use t.co URLs, using fallback\n";
} }
# that failed, so fall back on the old method.
my $text = &descape($post->{'text'}); # PRIORITY 4: Parse URLs from HTML content (href attributes)
# findallurls if (!$didprint) {
while ($text my $content = $working_post->{'content'} || $working_post->{'text'} || '';
=~ s#(h?ttp|h?ttps|ftp|gopher)://([a-zA-Z0-9_~/:%\-\+\.\=\&\?\#,]+)##){ if (length($content)) {
# sigh. I HATE YOU TINYARRO.WS # Extract URLs from href attributes in HTML
#TODO while ($content =~ s/<a[^>]+href=["']([^"']+)["'][^>]*>[^<]*<\/a>//i) {
# eventually we will have to put a punycode implementation into openurl my $url = $1;
# to handle things like Mac OS X's open which don't understand UTF-8 URLs. next if ($url =~ /^#/); # Skip hashtag links
# when we do, uncomment this again next if ($url =~ /^\@/); # Skip mention links
# =~ s#(http|https|ftp|gopher)://([^'\\]+?)('|\\|\s|$)##) { print STDERR "-- DEBUG: Found HTML href URL: " . $url . "\n" if ($superverbose);
my $url = $1 . "://$2"; &openurl($url);
$url = "h$url" if ($url =~ /^ttps?:/); $didprint++;
$url =~ s/[\.\?]$//; }
&openurl($url); }
} }
if ($didprint) {
return 0;
}
# PRIORITY 5: Final fallback - parse plain text URLs from display text
# This handles truncated URLs in display text as a last resort
if (!$didprint) {
# Re-get content since we may have modified it above with regex substitutions
my $original_content = $working_post->{'content'} || $working_post->{'text'} || '';
my $plain_content = &html_to_text($original_content);
$plain_content = &descape($plain_content);
print STDERR "-- DEBUG: Parsing plain text content: '$plain_content'\n" if ($superverbose);
# findallurls - extract any remaining URLs from plain text
# First try URLs with protocols
while ($plain_content
=~ s#(h?ttp|h?ttps|ftp|gopher)://([a-zA-Z0-9_~/:%\-\+\.\=\&\?\#,]+)##){
my $url = $1 . "://$2";
$url = "h$url" if ($url =~ /^ttps?:/);
$url =~ s/[\.\?]$//;
print STDERR "-- DEBUG: Found plain text URL with protocol: " . $url . "\n" if ($superverbose);
&openurl($url);
$didprint++;
}
# Then try URLs without protocols (assume https) - WARNING: may be truncated
while ($plain_content
=~ s#\b([a-zA-Z0-9\-]+\.[a-zA-Z]{2,}(?:/[a-zA-Z0-9_~/%:\-\+\.\=\&\?\#,]*)?)\b##){
my $url = "https://$1";
$url =~ s/[\.\?]$//;
print STDERR "-- DEBUG: Found plain text URL without protocol (may be truncated): " . $url . "\n" if ($superverbose);
print $stdout "-- WARNING: URL may be truncated from display text: $url\n";
&openurl($url);
$didprint++;
}
}
print $stdout "-- sorry, couldn't find any URL.\n" print $stdout "-- sorry, couldn't find any URL.\n"
if (!defined($urlshort)); if (!$didprint);
return 0; return 0;
} }
@@ -7932,15 +7979,71 @@ sub generate_shortdomain {
if (!length($shoreblogurldomain)); if (!length($shoreblogurldomain));
} }
sub detect_browser {
my $is_gui = defined($ENV{'DISPLAY'}) && length($ENV{'DISPLAY'});
my $browser;
if ($is_gui) {
# GUI environment - check gui_browser setting first
if (length($gui_browser)) {
return ($gui_browser, 1); # Return browser and background flag
}
# Try xdg-open first (most reliable on Linux)
if (system("which xdg-open >/dev/null 2>&1") == 0) {
return ('xdg-open %U', 1);
}
# Fallback GUI browsers
my @gui_browsers = qw(brave chromium firefox google-chrome);
foreach my $br (@gui_browsers) {
if (system("which $br >/dev/null 2>&1") == 0) {
return ("$br %U", 1);
}
}
# If no GUI browser found, fall back to CLI
print STDERR "-- warning: no GUI browser found, falling back to CLI browser\n";
}
# CLI environment or fallback - check cli_browser setting first
if (length($cli_browser)) {
return ($cli_browser, 0); # CLI browsers are blocking
}
# Try CLI browsers in order of preference
my @cli_browsers = qw(w3m elinks lynx);
foreach my $br (@cli_browsers) {
if (system("which $br >/dev/null 2>&1") == 0) {
return ("$br %U", 0);
}
}
# Ultimate fallback - use the old urlopen setting
return ($urlopen, 0);
}
sub openurl { sub openurl {
my $comm = $urlopen;
my $url = shift; my $url = shift;
my ($comm, $should_background) = &detect_browser();
# Handle gopher URLs through gateway if not using lynx
$url = "http://gopher.floodgap.com/gopher/gw?".&url_oauth_sub($url) $url = "http://gopher.floodgap.com/gopher/gw?".&url_oauth_sub($url)
if ($url =~ m#^gopher://# && $comm !~ /^[^\s]*lynx/); if ($url =~ m#^gopher://# && $comm !~ /^[^\s]*lynx/);
$urlshort = $url; $urlshort = $url;
$comm =~ s/\%U/'$url'/g; $comm =~ s/\%U/'$url'/g;
print $stdout "($comm)\n";
system("$comm"); if ($should_background) {
# Background GUI browsers
$comm .= " &";
print $stdout "($comm)\n";
system("$comm");
} else {
# CLI browsers run in foreground
print $stdout "($comm)\n";
system("$comm");
}
} }
sub urlshorten { sub urlshorten {