/url updated to actually work again.
This commit is contained in:
+166
-63
@@ -176,7 +176,9 @@ BEGIN {
|
||||
# lists= # Comma-separated list of lists to follow
|
||||
|
||||
# === URL HANDLING ===
|
||||
# urlopen=echo %U # Command to open URLs (%U = URL)
|
||||
# urlopen=echo %U # Command to open URLs (%U = URL) - deprecated, use cli_browser/gui_browser
|
||||
# cli_browser= # CLI browser (w3m, elinks, lynx) - auto-detected if empty
|
||||
# gui_browser= # GUI browser (xdg-open, firefox, etc) - auto-detected if empty
|
||||
# shoreblogurl=http://is.gd/api.php?longurl= # URL shortening service
|
||||
|
||||
# === SYSTEM SETTINGS ===
|
||||
@@ -265,7 +267,7 @@ EOF
|
||||
rlurl noprompt shoreblogurl newline wrap verify autosplit
|
||||
notimeline queryurl fediverseserver colourprompt colourme
|
||||
colourdm colourreply colourwarn coloursearch colourlist idurl
|
||||
urlopen delurl notrack dmdelurl favsurl
|
||||
urlopen cli_browser gui_browser delurl notrack dmdelurl favsurl
|
||||
favurl favdelurl slowpost notifies filter colourdefault
|
||||
followurl leaveurl dmupdate mentions backload
|
||||
lat long location searchhits blockurl blockdelurl woeid
|
||||
@@ -1080,6 +1082,8 @@ $pause = (($anonymous) ? 120 : "auto") if (!defined $pause);
|
||||
$superverbose ||= 0;
|
||||
$avatar ||= "";
|
||||
$urlopen ||= 'echo %U';
|
||||
$cli_browser ||= '';
|
||||
$gui_browser ||= '';
|
||||
$hold ||= 0;
|
||||
$daemon ||= 0;
|
||||
$maxhist ||= 19;
|
||||
@@ -3362,70 +3366,113 @@ EOF
|
||||
$genurl = $idurl;
|
||||
}
|
||||
|
||||
# to be TOS-compliant, we must try entities first to use
|
||||
# t.co wrapped links. this is a tiny version of /entities.
|
||||
unless ($notco) {
|
||||
my $id = $post->{'reblog'}->{'id_str'}
|
||||
|| $post->{'id_str'};
|
||||
my $hash;
|
||||
|
||||
# only fetch if we have to. if we already fetched
|
||||
# because we were given a direct id_str instead of a
|
||||
# menu code, then we already have the entities.
|
||||
if ($code !~ /^[0-9]+$/) {
|
||||
$hash = &grabjson("${genurl}?id=${id}",
|
||||
0, 0, 0, undef, 1);
|
||||
} else {
|
||||
# MAKE MONEY FAST WITH OUR QUICK CACHE PLAN
|
||||
$hash = $post;
|
||||
# Extract URLs from Mastodon post content
|
||||
my $didprint = 0;
|
||||
|
||||
# First, try to get the FULL post data from the API to access structured URL fields
|
||||
my $full_post = undef;
|
||||
if (defined($genurl) && defined($post->{'id_str'})) {
|
||||
my $post_id = $post->{'id_str'};
|
||||
# Replace %I placeholder with actual post ID
|
||||
my $api_url = $genurl;
|
||||
$api_url =~ s/%I/$post_id/g;
|
||||
print STDERR "-- DEBUG: Fetching full post data from API for URL extraction: $api_url\n" if ($superverbose);
|
||||
$full_post = &grabjson($api_url, 0, 0, 0, undef, 1);
|
||||
}
|
||||
|
||||
# Use full post data if available, otherwise fall back to cached data
|
||||
my $working_post = (defined($full_post) && ref($full_post) eq 'HASH') ? $full_post : $post;
|
||||
|
||||
# DEBUG: Show what we have in the post structure
|
||||
print STDERR "-- DEBUG: URL parsing - using " . (defined($full_post) ? "FULL" : "CACHED") . " post data\n" if ($superverbose);
|
||||
print STDERR "-- DEBUG: URL parsing - post keys: " . join(", ", keys %$working_post) . "\n" if ($superverbose);
|
||||
print STDERR "-- DEBUG: URL parsing - content field: '" . ($working_post->{'content'} || 'UNDEFINED') . "'\n" if ($superverbose);
|
||||
print STDERR "-- DEBUG: URL parsing - text field: '" . ($working_post->{'text'} || 'UNDEFINED') . "'\n" if ($superverbose);
|
||||
|
||||
# PRIORITY 1: Extract URLs from Mastodon's structured URL data (card/preview_url)
|
||||
if (defined($working_post->{'card'}) && ref($working_post->{'card'}) eq 'HASH') {
|
||||
my $card = $working_post->{'card'};
|
||||
if (defined($card->{'url'}) && length($card->{'url'})) {
|
||||
print STDERR "-- DEBUG: Found card URL: " . $card->{'url'} . "\n" if ($superverbose);
|
||||
&openurl($card->{'url'});
|
||||
$didprint++;
|
||||
}
|
||||
if (defined($hash) && ref($hash) eq 'HASH') {
|
||||
my $w;
|
||||
my $v;
|
||||
my $didprint = 0;
|
||||
|
||||
# fediverse puts entities in multiple fields.
|
||||
foreach $w (qw(media urls)) {
|
||||
my $p = $hash->{'entities'}->{$w};
|
||||
next if (!defined($p) ||
|
||||
ref($p) ne 'ARRAY');
|
||||
foreach $v (@{ $p }) {
|
||||
next if (!defined($v) ||
|
||||
ref($v) ne 'HASH');
|
||||
next if (!length($v->{'url'}) ||
|
||||
(!length($v->{'expanded_url'}) &&
|
||||
!length($v->{'media_url'})));
|
||||
my $u1 = &descape($v->{'url'});
|
||||
&openurl($u1);
|
||||
$didprint++;
|
||||
}
|
||||
}
|
||||
|
||||
# PRIORITY 2: Extract URLs from preview_url field (some servers use this)
|
||||
if (!$didprint && defined($working_post->{'preview_url'}) && length($working_post->{'preview_url'})) {
|
||||
print STDERR "-- DEBUG: Found preview_url: " . $working_post->{'preview_url'} . "\n" if ($superverbose);
|
||||
&openurl($working_post->{'preview_url'});
|
||||
$didprint++;
|
||||
}
|
||||
|
||||
# PRIORITY 3: Media attachments
|
||||
if (!$didprint && defined($working_post->{'media_attachments'}) &&
|
||||
ref($working_post->{'media_attachments'}) eq 'ARRAY') {
|
||||
foreach my $media (@{ $working_post->{'media_attachments'} }) {
|
||||
if (defined($media->{'url'}) && length($media->{'url'})) {
|
||||
print STDERR "-- DEBUG: Found media URL: " . $media->{'url'} . "\n" if ($superverbose);
|
||||
&openurl($media->{'url'});
|
||||
$didprint++;
|
||||
}
|
||||
print $stdout
|
||||
"-- sorry, couldn't find any URL.\n"
|
||||
if (!$didprint);
|
||||
return 0;
|
||||
}
|
||||
print $stdout
|
||||
"-- unable to use t.co URLs, using fallback\n";
|
||||
}
|
||||
# that failed, so fall back on the old method.
|
||||
my $text = &descape($post->{'text'});
|
||||
# findallurls
|
||||
while ($text
|
||||
=~ s#(h?ttp|h?ttps|ftp|gopher)://([a-zA-Z0-9_~/:%\-\+\.\=\&\?\#,]+)##){
|
||||
# sigh. I HATE YOU TINYARRO.WS
|
||||
#TODO
|
||||
# eventually we will have to put a punycode implementation into openurl
|
||||
# to handle things like Mac OS X's open which don't understand UTF-8 URLs.
|
||||
# when we do, uncomment this again
|
||||
# =~ s#(http|https|ftp|gopher)://([^'\\]+?)('|\\|\s|$)##) {
|
||||
my $url = $1 . "://$2";
|
||||
$url = "h$url" if ($url =~ /^ttps?:/);
|
||||
$url =~ s/[\.\?]$//;
|
||||
&openurl($url);
|
||||
|
||||
# PRIORITY 4: Parse URLs from HTML content (href attributes)
|
||||
if (!$didprint) {
|
||||
my $content = $working_post->{'content'} || $working_post->{'text'} || '';
|
||||
if (length($content)) {
|
||||
# Extract URLs from href attributes in HTML
|
||||
while ($content =~ s/<a[^>]+href=["']([^"']+)["'][^>]*>[^<]*<\/a>//i) {
|
||||
my $url = $1;
|
||||
next if ($url =~ /^#/); # Skip hashtag links
|
||||
next if ($url =~ /^\@/); # Skip mention links
|
||||
print STDERR "-- DEBUG: Found HTML href URL: " . $url . "\n" if ($superverbose);
|
||||
&openurl($url);
|
||||
$didprint++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($didprint) {
|
||||
return 0;
|
||||
}
|
||||
# PRIORITY 5: Final fallback - parse plain text URLs from display text
|
||||
# This handles truncated URLs in display text as a last resort
|
||||
if (!$didprint) {
|
||||
# Re-get content since we may have modified it above with regex substitutions
|
||||
my $original_content = $working_post->{'content'} || $working_post->{'text'} || '';
|
||||
my $plain_content = &html_to_text($original_content);
|
||||
$plain_content = &descape($plain_content);
|
||||
|
||||
print STDERR "-- DEBUG: Parsing plain text content: '$plain_content'\n" if ($superverbose);
|
||||
|
||||
# findallurls - extract any remaining URLs from plain text
|
||||
# First try URLs with protocols
|
||||
while ($plain_content
|
||||
=~ s#(h?ttp|h?ttps|ftp|gopher)://([a-zA-Z0-9_~/:%\-\+\.\=\&\?\#,]+)##){
|
||||
my $url = $1 . "://$2";
|
||||
$url = "h$url" if ($url =~ /^ttps?:/);
|
||||
$url =~ s/[\.\?]$//;
|
||||
print STDERR "-- DEBUG: Found plain text URL with protocol: " . $url . "\n" if ($superverbose);
|
||||
&openurl($url);
|
||||
$didprint++;
|
||||
}
|
||||
|
||||
# Then try URLs without protocols (assume https) - WARNING: may be truncated
|
||||
while ($plain_content
|
||||
=~ s#\b([a-zA-Z0-9\-]+\.[a-zA-Z]{2,}(?:/[a-zA-Z0-9_~/%:\-\+\.\=\&\?\#,]*)?)\b##){
|
||||
my $url = "https://$1";
|
||||
$url =~ s/[\.\?]$//;
|
||||
print STDERR "-- DEBUG: Found plain text URL without protocol (may be truncated): " . $url . "\n" if ($superverbose);
|
||||
print $stdout "-- WARNING: URL may be truncated from display text: $url\n";
|
||||
&openurl($url);
|
||||
$didprint++;
|
||||
}
|
||||
}
|
||||
|
||||
print $stdout "-- sorry, couldn't find any URL.\n"
|
||||
if (!defined($urlshort));
|
||||
if (!$didprint);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -7932,15 +7979,71 @@ sub generate_shortdomain {
|
||||
if (!length($shoreblogurldomain));
|
||||
}
|
||||
|
||||
sub detect_browser {
|
||||
my $is_gui = defined($ENV{'DISPLAY'}) && length($ENV{'DISPLAY'});
|
||||
my $browser;
|
||||
|
||||
if ($is_gui) {
|
||||
# GUI environment - check gui_browser setting first
|
||||
if (length($gui_browser)) {
|
||||
return ($gui_browser, 1); # Return browser and background flag
|
||||
}
|
||||
|
||||
# Try xdg-open first (most reliable on Linux)
|
||||
if (system("which xdg-open >/dev/null 2>&1") == 0) {
|
||||
return ('xdg-open %U', 1);
|
||||
}
|
||||
|
||||
# Fallback GUI browsers
|
||||
my @gui_browsers = qw(brave chromium firefox google-chrome);
|
||||
foreach my $br (@gui_browsers) {
|
||||
if (system("which $br >/dev/null 2>&1") == 0) {
|
||||
return ("$br %U", 1);
|
||||
}
|
||||
}
|
||||
|
||||
# If no GUI browser found, fall back to CLI
|
||||
print STDERR "-- warning: no GUI browser found, falling back to CLI browser\n";
|
||||
}
|
||||
|
||||
# CLI environment or fallback - check cli_browser setting first
|
||||
if (length($cli_browser)) {
|
||||
return ($cli_browser, 0); # CLI browsers are blocking
|
||||
}
|
||||
|
||||
# Try CLI browsers in order of preference
|
||||
my @cli_browsers = qw(w3m elinks lynx);
|
||||
foreach my $br (@cli_browsers) {
|
||||
if (system("which $br >/dev/null 2>&1") == 0) {
|
||||
return ("$br %U", 0);
|
||||
}
|
||||
}
|
||||
|
||||
# Ultimate fallback - use the old urlopen setting
|
||||
return ($urlopen, 0);
|
||||
}
|
||||
|
||||
sub openurl {
|
||||
my $comm = $urlopen;
|
||||
my $url = shift;
|
||||
my ($comm, $should_background) = &detect_browser();
|
||||
|
||||
# Handle gopher URLs through gateway if not using lynx
|
||||
$url = "http://gopher.floodgap.com/gopher/gw?".&url_oauth_sub($url)
|
||||
if ($url =~ m#^gopher://# && $comm !~ /^[^\s]*lynx/);
|
||||
|
||||
$urlshort = $url;
|
||||
$comm =~ s/\%U/'$url'/g;
|
||||
print $stdout "($comm)\n";
|
||||
system("$comm");
|
||||
|
||||
if ($should_background) {
|
||||
# Background GUI browsers
|
||||
$comm .= " &";
|
||||
print $stdout "($comm)\n";
|
||||
system("$comm");
|
||||
} else {
|
||||
# CLI browsers run in foreground
|
||||
print $stdout "($comm)\n";
|
||||
system("$comm");
|
||||
}
|
||||
}
|
||||
|
||||
sub urlshorten {
|
||||
|
||||
Reference in New Issue
Block a user