296 lines
7.0 KiB
Erlang
296 lines
7.0 KiB
Erlang
%% @author Bob Ippolito <bob@mochimedia.com>
|
|
%% @copyright 2007 Mochi Media, Inc.
|
|
|
|
%% @doc Converts HTML 4 charrefs and entities to codepoints.
|
|
-module(mochiweb_charref).
|
|
-export([charref/1, test/0]).
|
|
|
|
%% External API.
|
|
|
|
%% @spec charref(S) -> integer() | undefined
|
|
%% @doc Convert a decimal charref, hex charref, or html entity to a unicode
|
|
%% codepoint, or return undefined on failure.
|
|
%% The input should not include an ampersand or semicolon.
|
|
%% charref("#38") = 38, charref("#x26") = 38, charref("amp") = 38.
|
|
charref(B) when is_binary(B) ->
|
|
charref(binary_to_list(B));
|
|
charref([$#, C | L]) when C =:= $x orelse C =:= $X ->
|
|
try erlang:list_to_integer(L, 16)
|
|
catch
|
|
error:badarg -> undefined
|
|
end;
|
|
charref([$# | L]) ->
|
|
try list_to_integer(L)
|
|
catch
|
|
error:badarg -> undefined
|
|
end;
|
|
charref(L) ->
|
|
entity(L).
|
|
|
|
%% @spec test() -> ok
|
|
%% @doc Run tests for mochiweb_charref.
|
|
test() ->
|
|
1234 = charref("#1234"),
|
|
255 = charref("#xfF"),
|
|
255 = charref("#XFf"),
|
|
38 = charref("amp"),
|
|
undefined = charref("not_an_entity"),
|
|
ok.
|
|
|
|
%% Internal API.
|
|
|
|
entity("nbsp") -> 160;
|
|
entity("iexcl") -> 161;
|
|
entity("cent") -> 162;
|
|
entity("pound") -> 163;
|
|
entity("curren") -> 164;
|
|
entity("yen") -> 165;
|
|
entity("brvbar") -> 166;
|
|
entity("sect") -> 167;
|
|
entity("uml") -> 168;
|
|
entity("copy") -> 169;
|
|
entity("ordf") -> 170;
|
|
entity("laquo") -> 171;
|
|
entity("not") -> 172;
|
|
entity("shy") -> 173;
|
|
entity("reg") -> 174;
|
|
entity("macr") -> 175;
|
|
entity("deg") -> 176;
|
|
entity("plusmn") -> 177;
|
|
entity("sup2") -> 178;
|
|
entity("sup3") -> 179;
|
|
entity("acute") -> 180;
|
|
entity("micro") -> 181;
|
|
entity("para") -> 182;
|
|
entity("middot") -> 183;
|
|
entity("cedil") -> 184;
|
|
entity("sup1") -> 185;
|
|
entity("ordm") -> 186;
|
|
entity("raquo") -> 187;
|
|
entity("frac14") -> 188;
|
|
entity("frac12") -> 189;
|
|
entity("frac34") -> 190;
|
|
entity("iquest") -> 191;
|
|
entity("Agrave") -> 192;
|
|
entity("Aacute") -> 193;
|
|
entity("Acirc") -> 194;
|
|
entity("Atilde") -> 195;
|
|
entity("Auml") -> 196;
|
|
entity("Aring") -> 197;
|
|
entity("AElig") -> 198;
|
|
entity("Ccedil") -> 199;
|
|
entity("Egrave") -> 200;
|
|
entity("Eacute") -> 201;
|
|
entity("Ecirc") -> 202;
|
|
entity("Euml") -> 203;
|
|
entity("Igrave") -> 204;
|
|
entity("Iacute") -> 205;
|
|
entity("Icirc") -> 206;
|
|
entity("Iuml") -> 207;
|
|
entity("ETH") -> 208;
|
|
entity("Ntilde") -> 209;
|
|
entity("Ograve") -> 210;
|
|
entity("Oacute") -> 211;
|
|
entity("Ocirc") -> 212;
|
|
entity("Otilde") -> 213;
|
|
entity("Ouml") -> 214;
|
|
entity("times") -> 215;
|
|
entity("Oslash") -> 216;
|
|
entity("Ugrave") -> 217;
|
|
entity("Uacute") -> 218;
|
|
entity("Ucirc") -> 219;
|
|
entity("Uuml") -> 220;
|
|
entity("Yacute") -> 221;
|
|
entity("THORN") -> 222;
|
|
entity("szlig") -> 223;
|
|
entity("agrave") -> 224;
|
|
entity("aacute") -> 225;
|
|
entity("acirc") -> 226;
|
|
entity("atilde") -> 227;
|
|
entity("auml") -> 228;
|
|
entity("aring") -> 229;
|
|
entity("aelig") -> 230;
|
|
entity("ccedil") -> 231;
|
|
entity("egrave") -> 232;
|
|
entity("eacute") -> 233;
|
|
entity("ecirc") -> 234;
|
|
entity("euml") -> 235;
|
|
entity("igrave") -> 236;
|
|
entity("iacute") -> 237;
|
|
entity("icirc") -> 238;
|
|
entity("iuml") -> 239;
|
|
entity("eth") -> 240;
|
|
entity("ntilde") -> 241;
|
|
entity("ograve") -> 242;
|
|
entity("oacute") -> 243;
|
|
entity("ocirc") -> 244;
|
|
entity("otilde") -> 245;
|
|
entity("ouml") -> 246;
|
|
entity("divide") -> 247;
|
|
entity("oslash") -> 248;
|
|
entity("ugrave") -> 249;
|
|
entity("uacute") -> 250;
|
|
entity("ucirc") -> 251;
|
|
entity("uuml") -> 252;
|
|
entity("yacute") -> 253;
|
|
entity("thorn") -> 254;
|
|
entity("yuml") -> 255;
|
|
entity("fnof") -> 402;
|
|
entity("Alpha") -> 913;
|
|
entity("Beta") -> 914;
|
|
entity("Gamma") -> 915;
|
|
entity("Delta") -> 916;
|
|
entity("Epsilon") -> 917;
|
|
entity("Zeta") -> 918;
|
|
entity("Eta") -> 919;
|
|
entity("Theta") -> 920;
|
|
entity("Iota") -> 921;
|
|
entity("Kappa") -> 922;
|
|
entity("Lambda") -> 923;
|
|
entity("Mu") -> 924;
|
|
entity("Nu") -> 925;
|
|
entity("Xi") -> 926;
|
|
entity("Omicron") -> 927;
|
|
entity("Pi") -> 928;
|
|
entity("Rho") -> 929;
|
|
entity("Sigma") -> 931;
|
|
entity("Tau") -> 932;
|
|
entity("Upsilon") -> 933;
|
|
entity("Phi") -> 934;
|
|
entity("Chi") -> 935;
|
|
entity("Psi") -> 936;
|
|
entity("Omega") -> 937;
|
|
entity("alpha") -> 945;
|
|
entity("beta") -> 946;
|
|
entity("gamma") -> 947;
|
|
entity("delta") -> 948;
|
|
entity("epsilon") -> 949;
|
|
entity("zeta") -> 950;
|
|
entity("eta") -> 951;
|
|
entity("theta") -> 952;
|
|
entity("iota") -> 953;
|
|
entity("kappa") -> 954;
|
|
entity("lambda") -> 955;
|
|
entity("mu") -> 956;
|
|
entity("nu") -> 957;
|
|
entity("xi") -> 958;
|
|
entity("omicron") -> 959;
|
|
entity("pi") -> 960;
|
|
entity("rho") -> 961;
|
|
entity("sigmaf") -> 962;
|
|
entity("sigma") -> 963;
|
|
entity("tau") -> 964;
|
|
entity("upsilon") -> 965;
|
|
entity("phi") -> 966;
|
|
entity("chi") -> 967;
|
|
entity("psi") -> 968;
|
|
entity("omega") -> 969;
|
|
entity("thetasym") -> 977;
|
|
entity("upsih") -> 978;
|
|
entity("piv") -> 982;
|
|
entity("bull") -> 8226;
|
|
entity("hellip") -> 8230;
|
|
entity("prime") -> 8242;
|
|
entity("Prime") -> 8243;
|
|
entity("oline") -> 8254;
|
|
entity("frasl") -> 8260;
|
|
entity("weierp") -> 8472;
|
|
entity("image") -> 8465;
|
|
entity("real") -> 8476;
|
|
entity("trade") -> 8482;
|
|
entity("alefsym") -> 8501;
|
|
entity("larr") -> 8592;
|
|
entity("uarr") -> 8593;
|
|
entity("rarr") -> 8594;
|
|
entity("darr") -> 8595;
|
|
entity("harr") -> 8596;
|
|
entity("crarr") -> 8629;
|
|
entity("lArr") -> 8656;
|
|
entity("uArr") -> 8657;
|
|
entity("rArr") -> 8658;
|
|
entity("dArr") -> 8659;
|
|
entity("hArr") -> 8660;
|
|
entity("forall") -> 8704;
|
|
entity("part") -> 8706;
|
|
entity("exist") -> 8707;
|
|
entity("empty") -> 8709;
|
|
entity("nabla") -> 8711;
|
|
entity("isin") -> 8712;
|
|
entity("notin") -> 8713;
|
|
entity("ni") -> 8715;
|
|
entity("prod") -> 8719;
|
|
entity("sum") -> 8721;
|
|
entity("minus") -> 8722;
|
|
entity("lowast") -> 8727;
|
|
entity("radic") -> 8730;
|
|
entity("prop") -> 8733;
|
|
entity("infin") -> 8734;
|
|
entity("ang") -> 8736;
|
|
entity("and") -> 8743;
|
|
entity("or") -> 8744;
|
|
entity("cap") -> 8745;
|
|
entity("cup") -> 8746;
|
|
entity("int") -> 8747;
|
|
entity("there4") -> 8756;
|
|
entity("sim") -> 8764;
|
|
entity("cong") -> 8773;
|
|
entity("asymp") -> 8776;
|
|
entity("ne") -> 8800;
|
|
entity("equiv") -> 8801;
|
|
entity("le") -> 8804;
|
|
entity("ge") -> 8805;
|
|
entity("sub") -> 8834;
|
|
entity("sup") -> 8835;
|
|
entity("nsub") -> 8836;
|
|
entity("sube") -> 8838;
|
|
entity("supe") -> 8839;
|
|
entity("oplus") -> 8853;
|
|
entity("otimes") -> 8855;
|
|
entity("perp") -> 8869;
|
|
entity("sdot") -> 8901;
|
|
entity("lceil") -> 8968;
|
|
entity("rceil") -> 8969;
|
|
entity("lfloor") -> 8970;
|
|
entity("rfloor") -> 8971;
|
|
entity("lang") -> 9001;
|
|
entity("rang") -> 9002;
|
|
entity("loz") -> 9674;
|
|
entity("spades") -> 9824;
|
|
entity("clubs") -> 9827;
|
|
entity("hearts") -> 9829;
|
|
entity("diams") -> 9830;
|
|
entity("quot") -> 34;
|
|
entity("amp") -> 38;
|
|
entity("lt") -> 60;
|
|
entity("gt") -> 62;
|
|
entity("OElig") -> 338;
|
|
entity("oelig") -> 339;
|
|
entity("Scaron") -> 352;
|
|
entity("scaron") -> 353;
|
|
entity("Yuml") -> 376;
|
|
entity("circ") -> 710;
|
|
entity("tilde") -> 732;
|
|
entity("ensp") -> 8194;
|
|
entity("emsp") -> 8195;
|
|
entity("thinsp") -> 8201;
|
|
entity("zwnj") -> 8204;
|
|
entity("zwj") -> 8205;
|
|
entity("lrm") -> 8206;
|
|
entity("rlm") -> 8207;
|
|
entity("ndash") -> 8211;
|
|
entity("mdash") -> 8212;
|
|
entity("lsquo") -> 8216;
|
|
entity("rsquo") -> 8217;
|
|
entity("sbquo") -> 8218;
|
|
entity("ldquo") -> 8220;
|
|
entity("rdquo") -> 8221;
|
|
entity("bdquo") -> 8222;
|
|
entity("dagger") -> 8224;
|
|
entity("Dagger") -> 8225;
|
|
entity("permil") -> 8240;
|
|
entity("lsaquo") -> 8249;
|
|
entity("rsaquo") -> 8250;
|
|
entity("euro") -> 8364;
|
|
entity(_) -> undefined.
|
|
|