[w3m-dev 02815]

From: aito@fw.ipsj.or.jp
This commit is contained in:
Fumitoshi UKAI
2002-01-10 15:39:21 +00:00
parent 31d84e0083
commit e7c6a1c7f9
7 changed files with 63 additions and 46 deletions
+15 -1
View File
@@ -1,3 +1,17 @@
2002-01-11 aito@fw.ipsj.or.jp
* [w3m-dev 02815]
* cookie.c (domain_match): use new regex
* main.c (chkURLBuffer): use new regex
* main.c (chkNMIDBuffer): use new regex
* menu.c (menuForwardSearch): new regexMatch()
* regex.c (RegexMatch): negative length means whole string
* regex.c (RegexMatch): new regmatch()
* regex.c (regmatch_sub_anytime): add 5th arg `char *'
* regex.c (regmatch_iter): add 4th arg `char *'
* regex.c (regmatch): add 3rd arg `char *'
* url.c (openSocket): use new regex
2002-01-10 aito@fw.ipsj.or.jp 2002-01-10 aito@fw.ipsj.or.jp
* [w3m-dev 02811] new regexp implementation * [w3m-dev 02811] new regexp implementation
@@ -1818,4 +1832,4 @@
* release-0-2-1 * release-0-2-1
* import w3m-0.2.1 * import w3m-0.2.1
$Id: ChangeLog,v 1.203 2002/01/10 04:55:06 ukai Exp $ $Id: ChangeLog,v 1.204 2002/01/10 15:39:21 ukai Exp $
+4 -4
View File
@@ -1,4 +1,4 @@
/* $Id: cookie.c,v 1.6 2001/12/03 18:29:37 ukai Exp $ */ /* $Id: cookie.c,v 1.7 2002/01/10 15:39:21 ukai Exp $ */
/* /*
* References for version 0 cookie: * References for version 0 cookie:
@@ -45,9 +45,9 @@ domain_match(char *host, char *domain)
/* [RFC 2109] s. 2, "domain-match", case 1 /* [RFC 2109] s. 2, "domain-match", case 1
* (both are IP and identical) * (both are IP and identical)
*/ */
regexCompile("[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*", 0); regexCompile("[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+", 0);
m0 = regexMatch(host, 0, 1); m0 = regexMatch(host, -1, 1);
m1 = regexMatch(domain, 0, 1); m1 = regexMatch(domain, -1, 1);
if (m0 && m1) { if (m0 && m1) {
if (strcasecmp(host, domain) == 0) if (strcasecmp(host, domain) == 0)
return host; return host;
+4 -10
View File
@@ -1,4 +1,4 @@
/* $Id: main.c,v 1.55 2002/01/04 19:14:36 ukai Exp $ */ /* $Id: main.c,v 1.56 2002/01/10 15:39:21 ukai Exp $ */
#define MAINPROGRAM #define MAINPROGRAM
#include "fm.h" #include "fm.h"
#include <signal.h> #include <signal.h>
@@ -4147,10 +4147,7 @@ void
chkURLBuffer(Buffer *buf) chkURLBuffer(Buffer *buf)
{ {
static char *url_like_pat[] = { static char *url_like_pat[] = {
"http://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*[a-zA-Z0-9_/=]", "https?://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*[a-zA-Z0-9_/=]",
#ifdef USE_SSL
"https://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*[a-zA-Z0-9_/=]",
#endif /* USE_SSL */
#ifdef USE_GOPHER #ifdef USE_GOPHER
"gopher://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./_]*", "gopher://[a-zA-Z0-9][a-zA-Z0-9:%\\-\\./_]*",
#endif /* USE_GOPHER */ #endif /* USE_GOPHER */
@@ -4161,10 +4158,7 @@ chkURLBuffer(Buffer *buf)
#endif /* USE_NNTP */ #endif /* USE_NNTP */
"mailto:[^<> ][^<> ]*@[a-zA-Z0-9][a-zA-Z0-9\\-\\._]*[a-zA-Z0-9]", "mailto:[^<> ][^<> ]*@[a-zA-Z0-9][a-zA-Z0-9\\-\\._]*[a-zA-Z0-9]",
#ifdef INET6 #ifdef INET6
"http://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*", "https?://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*",
#ifdef USE_SSL
"https://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./?=~_\\&+@#,\\$;]*",
#endif /* USE_SSL */
"ftp://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./=_+@#,\\$]*", "ftp://[a-zA-Z0-9:%\\-\\./_@]*\\[[a-fA-F0-9:][a-fA-F0-9:\\.]*\\][a-zA-Z0-9:%\\-\\./=_+@#,\\$]*",
#endif /* INET6 */ #endif /* INET6 */
NULL NULL
@@ -4189,7 +4183,7 @@ void
chkNMIDBuffer(Buffer *buf) chkNMIDBuffer(Buffer *buf)
{ {
static char *url_like_pat[] = { static char *url_like_pat[] = {
"<[^<> ][^<> ]*@[A-z0-9\\.\\-_][A-z0-9\\.\\-_]*>", "<[^<> ][^<> ]*@[A-z0-9\\.\\-_]+>",
NULL, NULL,
}; };
int i; int i;
+2 -2
View File
@@ -1,4 +1,4 @@
/* $Id: menu.c,v 1.9 2001/12/10 17:02:44 ukai Exp $ */ /* $Id: menu.c,v 1.10 2002/01/10 15:39:21 ukai Exp $ */
/* /*
* w3m menu.c * w3m menu.c
*/ */
@@ -930,7 +930,7 @@ menuForwardSearch(Menu *menu, char *str, int from)
from = 0; from = 0;
for (i = from; i < menu->nitem; i++) for (i = from; i < menu->nitem; i++)
if (menu->item[i].type != MENU_NOP && if (menu->item[i].type != MENU_NOP &&
regexMatch(menu->item[i].label, 0, 1) == 1) regexMatch(menu->item[i].label, -1, 1) == 1)
return i; return i;
return -1; return -1;
} }
+34 -25
View File
@@ -1,4 +1,4 @@
/* $Id: regex.c,v 1.7 2002/01/10 04:55:07 ukai Exp $ */ /* $Id: regex.c,v 1.8 2002/01/10 15:39:21 ukai Exp $ */
/* /*
* regex: Regular expression pattern match library * regex: Regular expression pattern match library
* *
@@ -66,7 +66,7 @@ static Regex DefaultRegex;
#define CompiledRegex DefaultRegex.re #define CompiledRegex DefaultRegex.re
#define Cstorage DefaultRegex.storage #define Cstorage DefaultRegex.storage
static int regmatch(regexchar *, char *, int, char **); static int regmatch(regexchar *, char *, char *, int, char **);
static int regmatch1(regexchar *, longchar); static int regmatch1(regexchar *, longchar);
static int matchWhich(longchar *, longchar); static int matchWhich(longchar *, longchar);
@@ -261,7 +261,7 @@ RegexMatch(Regex *re, char *str, int len, int firstp)
if (str == NULL) if (str == NULL)
return 0; return 0;
if (len == 0) if (len < 0)
len = strlen(str); len = strlen(str);
re->position = NULL; re->position = NULL;
ep = str + len; ep = str + len;
@@ -269,7 +269,7 @@ RegexMatch(Regex *re, char *str, int len, int firstp)
lpos = NULL; lpos = NULL;
re->lposition = NULL; re->lposition = NULL;
for (r = re; r != NULL; r = r->alt_regex) { for (r = re; r != NULL; r = r->alt_regex) {
switch (regmatch(r->re, p, firstp && (p == str), &lpos)) { switch (regmatch(r->re, p, ep, firstp && (p == str), &lpos)) {
case 1: /* matched */ case 1: /* matched */
re->position = p; re->position = p;
if (re->lposition == NULL || re->lposition < lpos) if (re->lposition == NULL || re->lposition < lpos)
@@ -341,11 +341,13 @@ struct MatchingContext2 {
#define YIELD(retval,context,lnum) (context)->label = lnum; return (retval); label##lnum: #define YIELD(retval,context,lnum) (context)->label = lnum; return (retval); label##lnum:
static int regmatch_iter(struct MatchingContext1 *, regexchar *, char *, int); static int regmatch_iter(struct MatchingContext1 *,
regexchar *, char *, char *, int);
static int static int
regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex, regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex,
regexchar * pat2, char *str, int iter_limit, int firstp) regexchar * pat2,
char *str, char *end_p, int iter_limit, int firstp)
{ {
switch (c->label) { switch (c->label) {
case 1: case 1:
@@ -364,7 +366,7 @@ regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex,
c->firstp = firstp; c->firstp = firstp;
for (;;) { for (;;) {
c->ctx->label = 0; c->ctx->label = 0;
while (regmatch_iter(c->ctx, c->regex->re, c->str, c->firstp)) { while (regmatch_iter(c->ctx, c->regex->re, c->str, end_p, c->firstp)) {
c->n_any = c->ctx->lastpos - c->str; c->n_any = c->ctx->lastpos - c->str;
if (c->n_any <= 0) if (c->n_any <= 0)
continue; continue;
@@ -373,7 +375,7 @@ regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex,
c->lastpos = c->str + c->n_any; c->lastpos = c->str + c->n_any;
YIELD(1, c, 1); YIELD(1, c, 1);
} }
else if (regmatch(pat2, c->str + c->n_any, else if (regmatch(pat2, c->str + c->n_any, end_p,
c->firstp, &c->lastpos) == 1) { c->firstp, &c->lastpos) == 1) {
YIELD(1, c, 2); YIELD(1, c, 2);
} }
@@ -381,8 +383,8 @@ regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex,
continue; continue;
c->ctx2->label = 0; c->ctx2->label = 0;
while (regmatch_sub_anytime(c->ctx2, regex, pat2, while (regmatch_sub_anytime(c->ctx2, regex, pat2,
c->str + c->n_any, iter_limit - 1, c->str + c->n_any, end_p,
c->firstp)) { iter_limit - 1, c->firstp)) {
c->lastpos = c->ctx2->lastpos; c->lastpos = c->ctx2->lastpos;
YIELD(1, c, 3); YIELD(1, c, 3);
@@ -397,7 +399,7 @@ regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex,
static int static int
regmatch_iter(struct MatchingContext1 *c, regmatch_iter(struct MatchingContext1 *c,
regexchar * re, char *str, int firstp) regexchar * re, char *str, char *end_p, int firstp)
{ {
switch (c->label) { switch (c->label) {
case 1: case 1:
@@ -418,10 +420,11 @@ regmatch_iter(struct MatchingContext1 *c,
if (RE_MODE(re) == RE_ENDMARK) if (RE_MODE(re) == RE_ENDMARK)
return 0; return 0;
c->re = re; c->re = re;
c->end_p = str + strlen(str);
c->firstp = firstp; c->firstp = firstp;
c->str = str; c->str = str;
c->end_p = end_p;
c->sub_ctx = NULL; c->sub_ctx = NULL;
c->lastpos = NULL;
while (RE_MODE(c->re) != RE_ENDMARK) { while (RE_MODE(c->re) != RE_ENDMARK) {
if (c->re->mode & (RE_ANYTIME | RE_OPT)) { if (c->re->mode & (RE_ANYTIME | RE_OPT)) {
if (c->re->mode & RE_ANYTIME) if (c->re->mode & RE_ANYTIME)
@@ -441,6 +444,7 @@ regmatch_iter(struct MatchingContext1 *c,
c->re->p.sub, c->re->p.sub,
c->re + 1, c->re + 1,
c->str + c->n_any, c->str + c->n_any,
c->end_p,
c->iter_limit, c->iter_limit,
c->firstp)) { c->firstp)) {
c->n_any = c->ctx2->lastpos - c->str; c->n_any = c->ctx2->lastpos - c->str;
@@ -480,7 +484,7 @@ regmatch_iter(struct MatchingContext1 *c,
c->lastpos = c->str + c->n_any; c->lastpos = c->str + c->n_any;
YIELD(1, c, 2); YIELD(1, c, 2);
} }
else if (regmatch(c->re + 1, c->str + c->n_any, else if (regmatch(c->re + 1, c->str + c->n_any, c->end_p,
c->firstp, &c->lastpos) == 1) { c->firstp, &c->lastpos) == 1) {
YIELD(1, c, 3); YIELD(1, c, 3);
} }
@@ -488,8 +492,6 @@ regmatch_iter(struct MatchingContext1 *c,
return 0; return 0;
} }
/* regexp other than pat*, pat+ and pat? */ /* regexp other than pat*, pat+ and pat? */
if (c->str >= c->end_p)
return 0;
switch (RE_MODE(c->re)) { switch (RE_MODE(c->re)) {
case RE_BEGIN: case RE_BEGIN:
if (!c->firstp) if (!c->firstp)
@@ -497,9 +499,15 @@ regmatch_iter(struct MatchingContext1 *c,
c->re++; c->re++;
break; break;
case RE_END: case RE_END:
c->lastpos = c->str; if (c->str >= c->end_p) {
c->re++; c->lastpos = c->str;
YIELD((c->str >= c->end_p), c, 4); c->re++;
YIELD(1, c, 4);
}
else {
c->lastpos = NULL;
return 0;
}
break; break;
case RE_SUBREGEX: case RE_SUBREGEX:
if (c->sub_ctx == NULL) { if (c->sub_ctx == NULL) {
@@ -509,14 +517,14 @@ regmatch_iter(struct MatchingContext1 *c,
for (;;) { for (;;) {
c->sub_ctx->label = 0; c->sub_ctx->label = 0;
while (regmatch_iter(c->sub_ctx, c->sub_regex->re, while (regmatch_iter(c->sub_ctx, c->sub_regex->re,
c->str, c->firstp)) { c->str, c->end_p, c->firstp)) {
if (c->sub_ctx->lastpos != c->str) if (c->sub_ctx->lastpos != c->str)
c->firstp = 0; c->firstp = 0;
if (RE_MODE(c->re + 1) == RE_ENDMARK) { if (RE_MODE(c->re + 1) == RE_ENDMARK) {
c->lastpos = c->sub_ctx->lastpos; c->lastpos = c->sub_ctx->lastpos;
YIELD(1, c, 5); YIELD(1, c, 5);
} }
else if (regmatch(c->re + 1, c->sub_ctx->lastpos, else if (regmatch(c->re + 1, c->sub_ctx->lastpos, c->end_p,
c->firstp, &c->lastpos) == 1) { c->firstp, &c->lastpos) == 1) {
YIELD(1, c, 6); YIELD(1, c, 6);
} }
@@ -557,14 +565,14 @@ regmatch_iter(struct MatchingContext1 *c,
} }
static int static int
regmatch(regexchar * re, char *str, int firstp, char **lastpos) regmatch(regexchar * re, char *str, char *end_p, int firstp, char **lastpos)
{ {
struct MatchingContext1 contx; struct MatchingContext1 contx;
*lastpos = NULL; *lastpos = NULL;
contx.label = 0; contx.label = 0;
while (regmatch_iter(&contx, re, str, firstp)) { while (regmatch_iter(&contx, re, str, end_p, firstp)) {
#ifdef REGEX_DEBUG #ifdef REGEX_DEBUG
char *p; char *p;
if (verbose) { if (verbose) {
@@ -756,14 +764,15 @@ main(int argc, char **argv)
printf("Error on regexp /%s/: %s\n", buf, msg); printf("Error on regexp /%s/: %s\n", buf, msg);
exit(1); exit(1);
} }
if (RegexMatch(re, buf2, 0, 1)) { if (RegexMatch(re, buf2, -1, 1)) {
printf("/%s/\t%s\t", buf, buf2); printf("/%s/\t\"%s\"\t\"", buf, buf2);
MatchedPosition(re, &fpos, &epos); MatchedPosition(re, &fpos, &epos);
while (fpos < epos) while (fpos < epos)
putchar(*(fpos++)); putchar(*(fpos++));
putchar('"');
} }
else else
printf("/%s/\t%s\tno_match", buf, buf2); printf("/%s/\t\"%s\"\tno_match", buf, buf2);
putchar('\n'); putchar('\n');
} }
/* notreatched */ /* notreatched */
+1 -1
View File
@@ -1,4 +1,4 @@
/* $Id: regex.h,v 1.4 2002/01/10 04:55:07 ukai Exp $ */ /* $Id: regex.h,v 1.5 2002/01/10 15:39:21 ukai Exp $ */
#define REGEX_MAX 64 #define REGEX_MAX 64
#define STORAGE_MAX 256 #define STORAGE_MAX 256
+3 -3
View File
@@ -1,4 +1,4 @@
/* $Id: url.c,v 1.29 2002/01/07 16:28:17 ukai Exp $ */ /* $Id: url.c,v 1.30 2002/01/10 15:39:21 ukai Exp $ */
#include "fm.h" #include "fm.h"
#include <sys/types.h> #include <sys/types.h>
#include <sys/socket.h> #include <sys/socket.h>
@@ -626,8 +626,8 @@ openSocket(char *const hostname,
#endif #endif
goto error; goto error;
} }
regexCompile("^[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*\\.[0-9][0-9]*$", 0); regexCompile("^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$", 0);
if (regexMatch(hostname, 0, 1)) { if (regexMatch(hostname, -1, 1)) {
sscanf(hostname, "%d.%d.%d.%d", &a1, &a2, &a3, &a4); sscanf(hostname, "%d.%d.%d.%d", &a1, &a2, &a3, &a4);
adr = htonl((a1 << 24) | (a2 << 16) | (a3 << 8) | a4); adr = htonl((a1 << 24) | (a2 << 16) | (a3 << 8) | a4);
bcopy((void *)&adr, (void *)&hostaddr.sin_addr, sizeof(long)); bcopy((void *)&adr, (void *)&hostaddr.sin_addr, sizeof(long));