Files
w3m/entity.c
Fumitoshi UKAI 8ca5c59be7 add rcsids
2001-11-20 17:49:23 +00:00

136 lines
4.1 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* $Id: entity.c,v 1.2 2001/11/20 17:49:23 ukai Exp $ */
#include "fm.h"
#include <stdio.h>
#include "indep.h"
#include "Str.h"
#include "gc.h"
typedef struct {
short ucs;
char *ptr;
} entity_map;
#ifdef JP_CHARSET
#include "ucs_eucjp.h"
static char *latin1_eucjp_map[ 96 ] =
{
NBSP, "!", "", "", "CUR","", "|", "", /* 32- 39 */
"", "(C)","-a", "", "", "-", "(R)","", /* 40- 47 */
"", "", "^2", "^3", "'", "μ", "", "ˇ", /* 48- 55 */
",", "^1", "-o", "<EFBFBD>", "1/4","1/2","3/4","?", /* 56- 63 */
"A`", "A'", "A^", "A~", "A:", "", "AE","C,", /* 64- 71 */
"E`", "E'", "E^", "E", "I`", "I'", "I^", "I:", /* 72- 79 */
"D-", "N~", "O`", "O'", "O^", "O~", "Oe", "", /* 80- 87 */
"φ", "U`", "U'", "U^", "U:", "Y'", "th", "ss", /* 88- 95 */
"a`", "a'", "a^", "a~", "a:", "a", "ae", "c", /* 96-103 */
"e`", "e'", "e^", "e:", "i`", "i'", "i^", "i:", /* 104-111 */
"d-", "n~", "o`", "o'", "o^", "o~", "oe", "", /* 112-119 */
"φ", "u`", "u'", "u^", "u:", "y'", "th", "y:" /* 120-127 */
};
#else
#ifdef __EMX__
/*
* Character conversion table
* ( to code page 850 from iso-8859-1 )
*
* Following character constants are in code page 850.
*/
static char *latin1_cp850_map[ 96 ] = {
NBSP, "\255", "\275", "\234", "\317", "\276", "\335", "\365",
"\371", "\270", "\246", "\256", "\252", "\360", "\251", "\356",
"\370", "\361", "\375", "\374", "\357", "\346", "\364", "\372",
"\367", "\373", "\247", "\257", "\254", "\253", "\363", "\250",
"\267", "\265", "\266", "\307", "\216", "\217", "\222", "\200",
"\324", "\220", "\322", "\323", "\336", "\326", "\327", "\330",
"\321", "\245", "\343", "\340", "\342", "\345", "\231", "\236",
"\235", "\353", "\351", "\352", "\232", "\355", "\350", "\341",
"\205", "\240", "\203", "\306", "\204", "\206", "\221", "\207",
"\212", "\202", "\210", "\211", "\215", "\241", "\214", "\213",
"\320", "\244", "\225", "\242", "\223", "\344", "\224", "\366",
"\233", "\227", "\243", "\226", "\201", "\354", "\347", "\230"
};
#endif
#endif
#include "ucs_latin1.h"
static char *latin1_ascii_map[ 96 ] =
{
NBSP, "!", "-c-","-L-","CUR","=Y=","|", "S:", /* 32- 39 */
"\"", "(C)","-a", "<<", "NOT","-", "(R)","", /* 40- 47 */
"DEG","+-", "^2", "^3", "'", "u", "P:", ".", /* 48- 55 */
",", "^1", "-o", ">>", "1/4","1/2","3/4","?", /* 56- 63 */
"A`", "A'", "A^", "A~", "A:", "AA", "AE", "C,", /* 64- 71 */
"E`", "E'", "E^", "E", "I`", "I'", "I^", "I:", /* 72- 79 */
"D-", "N~", "O`", "O'", "O^", "O~", "Oe", "x", /* 80- 87 */
"O/", "U`", "U'", "U^", "U:", "Y'", "th", "ss", /* 88- 95 */
"a`", "a'", "a^", "a~", "a:", "a", "ae", "c", /* 96-103 */
"e`", "e'", "e^", "e:", "i`", "i'", "i^", "i:", /* 104-111 */
"d-", "n~", "o`", "o'", "o^", "o~", "oe", "-:", /* 112-119 */
"o/", "u`", "u'", "u^", "u:", "y'", "th", "y:" /* 120-127 */
};
char UseAltEntity = FALSE;
static int
map_cmp(const void *a, const void *b)
{
return *(int *)a - ((entity_map *)b)->ucs;
}
static char *
map_search(int c, entity_map *map, size_t n)
{
entity_map *m;
m = (entity_map *)bsearch((void *)&c, (void *)map, n,
sizeof(entity_map), map_cmp);
return m ? m->ptr : NULL;
}
char *
conv_entity(int c)
{
static char buf[] = {0, 0};
char *p;
if (c < 0) /* error */
return "?";
if (c < 0x80) { /* US-ASCII */
buf[0] = (char)c;
return buf;
}
if (c < 0xa0) /* C1 */
return "?";
if (c == 0xa0) /* NBSP */
return NBSP;
if (c < 0x100) { /* Latin 1 (ISO-8859-1) */
if (UseAltEntity)
return latin1_ascii_map[c - 0xa0];
#ifdef JP_CHARSET
return latin1_eucjp_map[c - 0xa0];
#else
#ifdef __EMX__
if (CodePage == 850)
return latin1_cp850_map[c - 0xa0];
#endif
buf[0] = (char)c;
return buf;
#endif
}
/* Unicode */
#ifdef JP_CHARSET
if (! UseAltEntity) {
p = map_search(c, ucs_eucjp_map,
sizeof(ucs_eucjp_map) / sizeof(entity_map));
return p ? p : "?";
}
#endif
p = map_search(c, ucs_latin1_map,
sizeof(ucs_latin1_map) / sizeof(entity_map));
if (p && *p & 0x80) /* ISO-8859-1 */
return conv_entity((int)(*p & 0xff));
return p ? p : "?";
}