Allow setting User Agent in Siteconf
Since Google gives usable search results to Lynx but not to w3m, and many other sites block Lynx but /not/ w3m, we want to be able to set the User Agent string on a per-site basis.
This commit is contained in:
@@ -13,6 +13,7 @@ substitute_url "<destination-url>"
|
|||||||
url_charset <charset>
|
url_charset <charset>
|
||||||
no_referer_from on|off
|
no_referer_from on|off
|
||||||
no_referer_to on|off
|
no_referer_to on|off
|
||||||
|
user_agent "string"
|
||||||
|
|
||||||
The last match wins.
|
The last match wins.
|
||||||
|
|
||||||
@@ -40,6 +41,12 @@ url_charset utf-8
|
|||||||
When combinated with "decode_url" option turned on, links to
|
When combinated with "decode_url" option turned on, links to
|
||||||
Wikipedia will be human-readable.
|
Wikipedia will be human-readable.
|
||||||
|
|
||||||
|
url m@^https?://(.*\.)google\.com/@
|
||||||
|
user_agent "Lynx/2.8.8dev.3 libwww-FM/2.14 SSL-MM/1.4.1"
|
||||||
|
|
||||||
|
Tell Google we're actually Lynx. (So they send us a text-browser friendly
|
||||||
|
results page.)
|
||||||
|
|
||||||
===== Regular expressions notes =====
|
===== Regular expressions notes =====
|
||||||
|
|
||||||
Following expressions are all equivalent:
|
Following expressions are all equivalent:
|
||||||
|
|||||||
@@ -271,8 +271,10 @@ extern int REV_LB[];
|
|||||||
#define SCONF_URL_CHARSET 2
|
#define SCONF_URL_CHARSET 2
|
||||||
#define SCONF_NO_REFERER_FROM 3
|
#define SCONF_NO_REFERER_FROM 3
|
||||||
#define SCONF_NO_REFERER_TO 4
|
#define SCONF_NO_REFERER_TO 4
|
||||||
#define SCONF_N_FIELD 5
|
#define SCONF_USER_AGENT 5
|
||||||
|
#define SCONF_N_FIELD 6
|
||||||
#define query_SCONF_SUBSTITUTE_URL(pu) ((const char *)querySiteconf(pu, SCONF_SUBSTITUTE_URL))
|
#define query_SCONF_SUBSTITUTE_URL(pu) ((const char *)querySiteconf(pu, SCONF_SUBSTITUTE_URL))
|
||||||
|
#define query_SCONF_USER_AGENT(pu) ((const char *)querySiteconf(pu, SCONF_USER_AGENT))
|
||||||
#define query_SCONF_URL_CHARSET(pu) ((const wc_ces *)querySiteconf(pu, SCONF_URL_CHARSET))
|
#define query_SCONF_URL_CHARSET(pu) ((const wc_ces *)querySiteconf(pu, SCONF_URL_CHARSET))
|
||||||
#define query_SCONF_NO_REFERER_FROM(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_FROM))
|
#define query_SCONF_NO_REFERER_FROM(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_FROM))
|
||||||
#define query_SCONF_NO_REFERER_TO(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_TO))
|
#define query_SCONF_NO_REFERER_TO(pu) ((const int *)querySiteconf(pu, SCONF_NO_REFERER_TO))
|
||||||
|
|||||||
@@ -1602,6 +1602,7 @@ helpFile(char *base)
|
|||||||
* url_charset <charset>
|
* url_charset <charset>
|
||||||
* no_referer_from on|off
|
* no_referer_from on|off
|
||||||
* no_referer_to on|off
|
* no_referer_to on|off
|
||||||
|
* user_agent "<string>"
|
||||||
*
|
*
|
||||||
* The last match wins.
|
* The last match wins.
|
||||||
*/
|
*/
|
||||||
@@ -1614,6 +1615,7 @@ struct siteconf_rec {
|
|||||||
unsigned char mask[(SCONF_N_FIELD + 7) >> 3];
|
unsigned char mask[(SCONF_N_FIELD + 7) >> 3];
|
||||||
|
|
||||||
char *substitute_url;
|
char *substitute_url;
|
||||||
|
char *user_agent;
|
||||||
#ifdef USE_M17N
|
#ifdef USE_M17N
|
||||||
wc_ces url_charset;
|
wc_ces url_charset;
|
||||||
#endif
|
#endif
|
||||||
@@ -1640,6 +1642,7 @@ newSiteconfRec(void)
|
|||||||
memset(ent->mask, 0, sizeof(ent->mask));
|
memset(ent->mask, 0, sizeof(ent->mask));
|
||||||
|
|
||||||
ent->substitute_url = NULL;
|
ent->substitute_url = NULL;
|
||||||
|
ent->user_agent = NULL;
|
||||||
#ifdef USE_M17N
|
#ifdef USE_M17N
|
||||||
ent->url_charset = 0;
|
ent->url_charset = 0;
|
||||||
#endif
|
#endif
|
||||||
@@ -1718,6 +1721,10 @@ loadSiteconf(void)
|
|||||||
ent->substitute_url = getQWord(&p);
|
ent->substitute_url = getQWord(&p);
|
||||||
SCONF_SET(ent, SCONF_SUBSTITUTE_URL);
|
SCONF_SET(ent, SCONF_SUBSTITUTE_URL);
|
||||||
}
|
}
|
||||||
|
if (strcmp(s, "user_agent") == 0) {
|
||||||
|
ent->user_agent = getQWord(&p);
|
||||||
|
SCONF_SET(ent, SCONF_USER_AGENT);
|
||||||
|
}
|
||||||
#ifdef USE_M17N
|
#ifdef USE_M17N
|
||||||
else if (strcmp(s, "url_charset") == 0) {
|
else if (strcmp(s, "url_charset") == 0) {
|
||||||
char *charset = getWord(&p);
|
char *charset = getWord(&p);
|
||||||
@@ -1797,6 +1804,11 @@ url_found:
|
|||||||
return tmp->ptr;
|
return tmp->ptr;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
|
case SCONF_USER_AGENT:
|
||||||
|
if (ent->user_agent && *ent->user_agent) {
|
||||||
|
return ent->user_agent;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
#ifdef USE_M17N
|
#ifdef USE_M17N
|
||||||
case SCONF_URL_CHARSET:
|
case SCONF_URL_CHARSET:
|
||||||
return &ent->url_charset;
|
return &ent->url_charset;
|
||||||
|
|||||||
@@ -1323,10 +1323,13 @@ otherinfo(ParsedURL *target, ParsedURL *current, char *referer)
|
|||||||
Str s = Strnew();
|
Str s = Strnew();
|
||||||
const int *no_referer_ptr;
|
const int *no_referer_ptr;
|
||||||
int no_referer;
|
int no_referer;
|
||||||
|
const char* url_user_agent = query_SCONF_USER_AGENT(target);
|
||||||
|
|
||||||
if (!override_user_agent) {
|
if (!override_user_agent) {
|
||||||
Strcat_charp(s, "User-Agent: ");
|
Strcat_charp(s, "User-Agent: ");
|
||||||
if (UserAgent == NULL || *UserAgent == '\0')
|
if (url_user_agent)
|
||||||
|
Strcat_charp(s, url_user_agent);
|
||||||
|
else if (UserAgent == NULL || *UserAgent == '\0')
|
||||||
Strcat_charp(s, w3m_version);
|
Strcat_charp(s, w3m_version);
|
||||||
else
|
else
|
||||||
Strcat_charp(s, UserAgent);
|
Strcat_charp(s, UserAgent);
|
||||||
|
|||||||
Reference in New Issue
Block a user