forked from cory/tildefriends
3349 lines
616 KiB
JavaScript
3349 lines
616 KiB
JavaScript
|
(function(global) {
|
|||
|
'use strict';
|
|||
|
global["encoding-indexes"] =
|
|||
|
{
|
|||
|
"big5":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,
|
|||
|
"euc-kr":[44034,44035,44037,44038,44043,44044,44045,44046,44047,44056,44062,44063,44065,44066,44067,44069,44070,44071,44072,44073,44074,44075,44078,44082,44083,44084,null,null,null,null,null,null,44085,44086,44087,44090,44091,44093,44094,44095,44097,44098,44099,44100,44101,44102,44103,44104,44105,44106,44108,44110,44111,44112,44113,44114,44115,44117,null,null,null,null,null,null,44118,44119,44121,44122,44123,44125,44126,44127,44128,44129,44130,44131,44132,44133,44134,44135,44136,44137,44138,44139,44140,44141,44142,44143,44146,44147,44149,44150,44153,44155,44156,44157,44158,44159,44162,44167,44168,44173,44174,44175,44177,44178,44179,44181,44182,44183,44184,44185,44186,44187,44190,44194,44195,44196,44197,44198,44199,44203,44205,44206,44209,44210,44211,44212,44213,44214,44215,44218,44222,44223,44224,44226,44227,44229,44230,44231,44233,44234,44235,44237,44238,44239,44240,44241,44242,44243,44244,44246,44248,44249,44250,44251,44252,44253,44254,44255,44258,44259,44261,44262,44265,44267,44269,44270,44274,44276,44279,44280,44281,44282,44283,44286,44287,44289,44290,44291,44293,44295,44296,44297,44298,44299,44302,44304,44306,44307,44308,44309,44310,44311,44313,44314,44315,44317,44318,44319,44321,44322,44323,44324,44325,44326,44327,44328,44330,44331,44334,44335,44336,44337,44338,44339,null,null,null,null,null,null,44342,44343,44345,44346,44347,44349,44350,44351,44352,44353,44354,44355,44358,44360,44362,44363,44364,44365,44366,44367,44369,44370,44371,44373,44374,44375,null,null,null,null,null,null,44377,44378,44379,44380,44381,44382,44383,44384,44386,44388,44389,44390,44391,44392,44393,44394,44395,44398,44399,44401,44402,44407,44408,44409,44410,44414,44416,44419,44420,44421,44422,44423,44426,44427,44429,44430,44431,44433,44434,44435,44436,44437,44438,44439,44440,44441,44442,44443,44446,44447,44448,44449,44450,44451,44453,44454,44455,44456,44457,44458,44459,44460,44461,44462,44463,44464,44465,44466,44467,44468,44469,44470,44472,44473,44474,44475,44476,44477,44478,44479,44482,44483,44485,44486,44487,44489,44490,44491,44492,44493,44494,44495,44498,44500,44501,44502,44503,44504,44505,44506,44507,44509,44510,44511,44513,44514,44515,44517,44518,44519,44520,44521,44522,44523,44524,44525,44526,44527,44528,44529,44530,44531,44532,44533,44534,44535,44538,44539,44541,44542,44546,44547,44548,44549,44550,44551,44554,44556,44558,44559,44560,44561,44562,44563,44565,44566,44567,44568,44569,44570,44571,44572,null,null,null,null,null,null,44573,44574,44575,44576,44577,44578,44579,44580,44581,44582,44583,44584,44585,44586,44587,44588,44589,44590,44591,44594,44595,44597,44598,44601,44603,44604,null,null,null,null,null,null,44605,44606,44607,44610,44612,44615,44616,44617,44619,44623,44625,44626,44627,44629,44631,44632,44633,44634,44635,44638,44642,44643,44644,44646,44647,44650,44651,44653,44654,44655,44657,44658,44659,44660,44661,44662,44663,44666,44670,44671,44672,44673,44674,44675,44678,44679,44680,44681,44682,44683,44685,44686,44687,44688,44689,44690,44691,44692,44693,44694,44695,44696,44697,44698,44699,44700,44701,44702,44703,44704,44705,44706,44707,44708,44709,44710,44711,44712,44713,44714,44715,44716,44717,44718,44719,44720,44721,44722,44723,44724,44725,44726,44727,44728,44729,44730,44731,44735,44737,44738,44739,44741,44742,44743,44744,44745,44746,44747,44750,44754,44755,44756,44757,44758,44759,44762,44763,44765,44766,44767,44768,44769,44770,44771,44772,44773,44774,44775,44777,44778,44780,44782,44783,44784,44785,44786,44787,44789,44790,44791,44793,44794,44795,44797,44798,44799,44800,44801,44802,44803,44804,44805,null,null,null,null,null,null,44806,44809,44810,44811,44812,44814,44815,44817,44818,44819,44820,44821,44822,44823,44824,44825,44826,44827,44828,44829,44830,44831,44832,44833,44834,44835,null,null,null,null,null,null,44836,44837,44838,44839,44840,44841,44842,44843,44846,44847,44849,44851,44853,44854,44855,44856,44857,44858,44859,44862,44864,44868,44869,44870,44871,44874,44875,44876,44877,44878,44879,44881,44882,44883,44884,44885,44886,44887,44888,44889,44890,44891,44894,44895,44896,44897,44898,44899,44902,44903,44904,44905,44906,44907,449
|
|||
|
"gb18030":[19970,19972,19973,19974,19983,19986,19991,19999,20000,20001,20003,20006,20009,20014,20015,20017,20019,20021,20023,20028,20032,20033,20034,20036,20038,20042,20049,20053,20055,20058,20059,20066,20067,20068,20069,20071,20072,20074,20075,20076,20077,20078,20079,20082,20084,20085,20086,20087,20088,20089,20090,20091,20092,20093,20095,20096,20097,20098,20099,20100,20101,20103,20106,20112,20118,20119,20121,20124,20125,20126,20131,20138,20143,20144,20145,20148,20150,20151,20152,20153,20156,20157,20158,20168,20172,20175,20176,20178,20186,20187,20188,20192,20194,20198,20199,20201,20205,20206,20207,20209,20212,20216,20217,20218,20220,20222,20224,20226,20227,20228,20229,20230,20231,20232,20235,20236,20242,20243,20244,20245,20246,20252,20253,20257,20259,20264,20265,20268,20269,20270,20273,20275,20277,20279,20281,20283,20286,20287,20288,20289,20290,20292,20293,20295,20296,20297,20298,20299,20300,20306,20308,20310,20321,20322,20326,20328,20330,20331,20333,20334,20337,20338,20341,20343,20344,20345,20346,20349,20352,20353,20354,20357,20358,20359,20362,20364,20366,20368,20370,20371,20373,20374,20376,20377,20378,20380,20382,20383,20385,20386,20388,20395,20397,20400,20401,20402,20403,20404,20406,20407,20408,20409,20410,20411,20412,20413,20414,20416,20417,20418,20422,20423,20424,20425,20427,20428,20429,20434,20435,20436,20437,20438,20441,20443,20448,20450,20452,20453,20455,20459,20460,20464,20466,20468,20469,20470,20471,20473,20475,20476,20477,20479,20480,20481,20482,20483,20484,20485,20486,20487,20488,20489,20490,20491,20494,20496,20497,20499,20501,20502,20503,20507,20509,20510,20512,20514,20515,20516,20519,20523,20527,20528,20529,20530,20531,20532,20533,20534,20535,20536,20537,20539,20541,20543,20544,20545,20546,20548,20549,20550,20553,20554,20555,20557,20560,20561,20562,20563,20564,20566,20567,20568,20569,20571,20573,20574,20575,20576,20577,20578,20579,20580,20582,20583,20584,20585,20586,20587,20589,20590,20591,20592,20593,20594,20595,20596,20597,20600,20601,20602,20604,20605,20609,20610,20611,20612,20614,20615,20617,20618,20619,20620,20622,20623,20624,20625,20626,20627,20628,20629,20630,20631,20632,20633,20634,20635,20636,20637,20638,20639,20640,20641,20642,20644,20646,20650,20651,20653,20654,20655,20656,20657,20659,20660,20661,20662,20663,20664,20665,20668,20669,20670,20671,20672,20673,20674,20675,20676,20677,20678,20679,20680,20681,20682,20683,20684,20685,20686,20688,20689,20690,20691,20692,20693,20695,20696,20697,20699,20700,20701,20702,20703,20704,20705,20706,20707,20708,20709,20712,20713,20714,20715,20719,20720,20721,20722,20724,20726,20727,20728,20729,20730,20732,20733,20734,20735,20736,20737,20738,20739,20740,20741,20744,20745,20746,20748,20749,20750,20751,20752,20753,20755,20756,20757,20758,20759,20760,20761,20762,20763,20764,20765,20766,20767,20768,20770,20771,20772,20773,20774,20775,20776,20777,20778,20779,20780,20781,20782,20783,20784,20785,20786,20787,20788,20789,20790,20791,20792,20793,20794,20795,20796,20797,20798,20802,20807,20810,20812,20814,20815,20816,20818,20819,20823,20824,20825,20827,20829,20830,20831,20832,20833,20835,20836,20838,20839,20841,20842,20847,20850,20858,20862,20863,20867,20868,20870,20871,20874,20875,20878,20879,20880,20881,20883,20884,20888,20890,20893,20894,20895,20897,20899,20902,20903,20904,20905,20906,20909,20910,20916,20920,20921,20922,20926,20927,20929,20930,20931,20933,20936,20938,20941,20942,20944,20946,20947,20948,20949,20950,20951,20952,20953,20954,20956,20958,20959,20962,20963,20965,20966,20967,20968,20969,20970,20972,20974,20977,20978,20980,20983,20990,20996,20997,21001,21003,21004,21007,21008,21011,21012,21013,21020,21022,21023,21025,21026,21027,21029,21030,21031,21034,21036,21039,21041,21042,21044,21045,21052,21054,21060,21061,21062,21063,21064,21065,21067,21070,21071,21074,21075,21077,21079,21080,21081,21082,21083,21085,21087,21088,21090,21091,21092,21094,21096,21099,21100,21101,21102,21104,21105,21107,21108,21109,21110,21111,21112,21113,21114,21115,21116,21118,21120,21123,21124,21125,21126,21127,21129,21130,21131,21132,21133,21134,21135,21137,21138,21140,21141,21142,21143,21
|
|||
|
"gb18030-ranges":[[0,128],[36,165],[38,169],[45,178],[50,184],[81,216],[89,226],[95,235],[96,238],[100,244],[103,248],[104,251],[105,253],[109,258],[126,276],[133,284],[148,300],[172,325],[175,329],[179,334],[208,364],[306,463],[307,465],[308,467],[309,469],[310,471],[311,473],[312,475],[313,477],[341,506],[428,594],[443,610],[544,712],[545,716],[558,730],[741,930],[742,938],[749,962],[750,970],[805,1026],[819,1104],[820,1106],[7922,8209],[7924,8215],[7925,8218],[7927,8222],[7934,8231],[7943,8241],[7944,8244],[7945,8246],[7950,8252],[8062,8365],[8148,8452],[8149,8454],[8152,8458],[8164,8471],[8174,8482],[8236,8556],[8240,8570],[8262,8596],[8264,8602],[8374,8713],[8380,8720],[8381,8722],[8384,8726],[8388,8731],[8390,8737],[8392,8740],[8393,8742],[8394,8748],[8396,8751],[8401,8760],[8406,8766],[8416,8777],[8419,8781],[8424,8787],[8437,8802],[8439,8808],[8445,8816],[8482,8854],[8485,8858],[8496,8870],[8521,8896],[8603,8979],[8936,9322],[8946,9372],[9046,9548],[9050,9588],[9063,9616],[9066,9622],[9076,9634],[9092,9652],[9100,9662],[9108,9672],[9111,9676],[9113,9680],[9131,9702],[9162,9735],[9164,9738],[9218,9793],[9219,9795],[11329,11906],[11331,11909],[11334,11913],[11336,11917],[11346,11928],[11361,11944],[11363,11947],[11366,11951],[11370,11956],[11372,11960],[11375,11964],[11389,11979],[11682,12284],[11686,12292],[11687,12312],[11692,12319],[11694,12330],[11714,12351],[11716,12436],[11723,12447],[11725,12535],[11730,12543],[11736,12586],[11982,12842],[11989,12850],[12102,12964],[12336,13200],[12348,13215],[12350,13218],[12384,13253],[12393,13263],[12395,13267],[12397,13270],[12510,13384],[12553,13428],[12851,13727],[12962,13839],[12973,13851],[13738,14617],[13823,14703],[13919,14801],[13933,14816],[14080,14964],[14298,15183],[14585,15471],[14698,15585],[15583,16471],[15847,16736],[16318,17208],[16434,17325],[16438,17330],[16481,17374],[16729,17623],[17102,17997],[17122,18018],[17315,18212],[17320,18218],[17402,18301],[17418,18318],[17859,18760],[17909,18811],[17911,18814],[17915,18820],[17916,18823],[17936,18844],[17939,18848],[17961,18872],[18664,19576],[18703,19620],[18814,19738],[18962,19887],[19043,40870],[33469,59244],[33470,59336],[33471,59367],[33484,59413],[33485,59417],[33490,59423],[33497,59431],[33501,59437],[33505,59443],[33513,59452],[33520,59460],[33536,59478],[33550,59493],[37845,63789],[37921,63866],[37948,63894],[38029,63976],[38038,63986],[38064,64016],[38065,64018],[38066,64021],[38069,64025],[38075,64034],[38076,64037],[38078,64042],[39108,65074],[39109,65093],[39113,65107],[39114,65112],[39115,65127],[39116,65132],[39265,65375],[39394,65510],[189000,65536]],
|
|||
|
"jis0208":[12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180,65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294,12295,12540,8213,8208,65295,65340,65374,8741,65372,8230,8229,8216,8217,8220,8221,65288,65289,12308,12309,65339,65341,65371,65373,12296,12297,12298,12299,12300,12301,12302,12303,12304,12305,65291,65293,177,215,247,65309,8800,65308,65310,8806,8807,8734,8756,9794,9792,176,8242,8243,8451,65509,65284,65504,65505,65285,65283,65286,65290,65312,167,9734,9733,9675,9679,9678,9671,9670,9633,9632,9651,9650,9661,9660,8251,12306,8594,8592,8593,8595,12307,null,null,null,null,null,null,null,null,null,null,null,8712,8715,8838,8839,8834,8835,8746,8745,null,null,null,null,null,null,null,null,8743,8744,65506,8658,8660,8704,8707,null,null,null,null,null,null,null,null,null,null,null,8736,8869,8978,8706,8711,8801,8786,8810,8811,8730,8765,8733,8757,8747,8748,null,null,null,null,null,null,null,8491,8240,9839,9837,9834,8224,8225,182,null,null,null,null,9711,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,65296,65297,65298,65299,65300,65301,65302,65303,65304,65305,null,null,null,null,null,null,null,65313,65314,65315,65316,65317,65318,65319,65320,65321,65322,65323,65324,65325,65326,65327,65328,65329,65330,65331,65332,65333,65334,65335,65336,65337,65338,null,null,null,null,null,null,65345,65346,65347,65348,65349,65350,65351,65352,65353,65354,65355,65356,65357,65358,65359,65360,65361,65362,65363,65364,65365,65366,65367,65368,65369,65370,null,null,null,null,12353,12354,12355,12356,12357,12358,12359,12360,12361,12362,12363,12364,12365,12366,12367,12368,12369,12370,12371,12372,12373,12374,12375,12376,12377,12378,12379,12380,12381,12382,12383,12384,12385,12386,12387,12388,12389,12390,12391,12392,12393,12394,12395,12396,12397,12398,12399,12400,12401,12402,12403,12404,12405,12406,12407,12408,12409,12410,12411,12412,12413,12414,12415,12416,12417,12418,12419,12420,12421,12422,12423,12424,12425,12426,12427,12428,12429,12430,12431,12432,12433,12434,12435,null,null,null,null,null,null,null,null,null,null,null,12449,12450,12451,12452,12453,12454,12455,12456,12457,12458,12459,12460,12461,12462,12463,12464,12465,12466,12467,12468,12469,12470,12471,12472,12473,12474,12475,12476,12477,12478,12479,12480,12481,12482,12483,12484,12485,12486,12487,12488,12489,12490,12491,12492,12493,12494,12495,12496,12497,12498,12499,12500,12501,12502,12503,12504,12505,12506,12507,12508,12509,12510,12511,12512,12513,12514,12515,12516,12517,12518,12519,12520,12521,12522,12523,12524,12525,12526,12527,12528,12529,12530,12531,12532,12533,12534,null,null,null,null,null,null,null,null,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,931,932,933,934,935,936,937,null,null,null,null,null,null,null,null,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,963,964,965,966,967,968,969,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1040,1041,1042,1043,1044,1045,1025,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1072,1073,1074,1075,1076,1077,1105,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,null,null,null,null,null,null,null,null,null,null,null,null,null,9472,9474,9484,9488,9496,9492,9500,9516,9508,9524,9532,9473,9475,9487,9491,9499,9495,9507,9523,9515,9531,9547,9504,9519,9512,9527,9535,9501,9520,9509,9528,9538,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,nu
|
|||
|
"jis0212":[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,728,711,184,729,733,175,731,730,65374,900,901,null,null,null,null,null,null,null,null,161,166,191,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,186,170,169,174,8482,164,8470,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,902,904,905,906,938,null,908,null,910,939,null,911,null,null,null,null,940,941,942,943,970,912,972,962,973,971,944,974,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1038,1039,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,1118,1119,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,198,272,null,294,null,306,null,321,319,null,330,216,338,null,358,222,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,230,273,240,295,305,307,312,322,320,329,331,248,339,223,359,254,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,nu
|
|||
|
"ibm866":[1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,9617,9618,9619,9474,9508,9569,9570,9558,9557,9571,9553,9559,9565,9564,9563,9488,9492,9524,9516,9500,9472,9532,9566,9567,9562,9556,9577,9574,9568,9552,9580,9575,9576,9572,9573,9561,9560,9554,9555,9579,9578,9496,9484,9608,9604,9612,9616,9600,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,1025,1105,1028,1108,1031,1111,1038,1118,176,8729,183,8730,8470,164,9632,160],
|
|||
|
"iso-8859-2":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,728,321,164,317,346,167,168,352,350,356,377,173,381,379,176,261,731,322,180,318,347,711,184,353,351,357,378,733,382,380,340,193,194,258,196,313,262,199,268,201,280,203,282,205,206,270,272,323,327,211,212,336,214,215,344,366,218,368,220,221,354,223,341,225,226,259,228,314,263,231,269,233,281,235,283,237,238,271,273,324,328,243,244,337,246,247,345,367,250,369,252,253,355,729],
|
|||
|
"iso-8859-3":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,294,728,163,164,null,292,167,168,304,350,286,308,173,null,379,176,295,178,179,180,181,293,183,184,305,351,287,309,189,null,380,192,193,194,null,196,266,264,199,200,201,202,203,204,205,206,207,null,209,210,211,212,288,214,215,284,217,218,219,220,364,348,223,224,225,226,null,228,267,265,231,232,233,234,235,236,237,238,239,null,241,242,243,244,289,246,247,285,249,250,251,252,365,349,729],
|
|||
|
"iso-8859-4":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,312,342,164,296,315,167,168,352,274,290,358,173,381,175,176,261,731,343,180,297,316,711,184,353,275,291,359,330,382,331,256,193,194,195,196,197,198,302,268,201,280,203,278,205,206,298,272,325,332,310,212,213,214,215,216,370,218,219,220,360,362,223,257,225,226,227,228,229,230,303,269,233,281,235,279,237,238,299,273,326,333,311,244,245,246,247,248,371,250,251,252,361,363,729],
|
|||
|
"iso-8859-5":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,173,1038,1039,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103,8470,1105,1106,1107,1108,1109,1110,1111,1112,1113,1114,1115,1116,167,1118,1119],
|
|||
|
"iso-8859-6":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,null,null,null,164,null,null,null,null,null,null,null,1548,173,null,null,null,null,null,null,null,null,null,null,null,null,null,1563,null,null,null,1567,null,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,1591,1592,1593,1594,null,null,null,null,null,1600,1601,1602,1603,1604,1605,1606,1607,1608,1609,1610,1611,1612,1613,1614,1615,1616,1617,1618,null,null,null,null,null,null,null,null,null,null,null,null,null],
|
|||
|
"iso-8859-7":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,8216,8217,163,8364,8367,166,167,168,169,890,171,172,173,null,8213,176,177,178,179,900,901,902,183,904,905,906,187,908,189,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,null,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,null],
|
|||
|
"iso-8859-8":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,null,162,163,164,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,8215,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,null,null,8206,8207,null],
|
|||
|
"iso-8859-10":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,274,290,298,296,310,167,315,272,352,358,381,173,362,330,176,261,275,291,299,297,311,183,316,273,353,359,382,8213,363,331,256,193,194,195,196,197,198,302,268,201,280,203,278,205,206,207,208,325,332,211,212,213,214,360,216,370,218,219,220,221,222,223,257,225,226,227,228,229,230,303,269,233,281,235,279,237,238,239,240,326,333,243,244,245,246,361,248,371,250,251,252,253,254,312],
|
|||
|
"iso-8859-13":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,8221,162,163,164,8222,166,167,216,169,342,171,172,173,174,198,176,177,178,179,8220,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324,326,243,333,245,246,247,371,322,347,363,252,380,382,8217],
|
|||
|
"iso-8859-14":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,7682,7683,163,266,267,7690,167,7808,169,7810,7691,7922,173,174,376,7710,7711,288,289,7744,7745,182,7766,7809,7767,7811,7776,7923,7812,7813,7777,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,372,209,210,211,212,213,214,7786,216,217,218,219,220,221,374,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,373,241,242,243,244,245,246,7787,248,249,250,251,252,253,375,255],
|
|||
|
"iso-8859-15":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,8364,165,352,167,353,169,170,171,172,173,174,175,176,177,178,179,381,181,182,183,382,185,186,187,338,339,376,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255],
|
|||
|
"iso-8859-16":[128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,260,261,321,8364,8222,352,167,353,169,536,171,377,173,378,379,176,177,268,322,381,8221,182,183,382,269,537,187,338,339,376,380,192,193,194,258,196,262,198,199,200,201,202,203,204,205,206,207,272,323,210,211,212,336,214,346,368,217,218,219,220,280,538,223,224,225,226,259,228,263,230,231,232,233,234,235,236,237,238,239,273,324,242,243,244,337,246,347,369,249,250,251,252,281,539,255],
|
|||
|
"koi8-r":[9472,9474,9484,9488,9492,9496,9500,9508,9516,9524,9532,9600,9604,9608,9612,9616,9617,9618,9619,8992,9632,8729,8730,8776,8804,8805,160,8993,176,178,183,247,9552,9553,9554,1105,9555,9556,9557,9558,9559,9560,9561,9562,9563,9564,9565,9566,9567,9568,9569,1025,9570,9571,9572,9573,9574,9575,9576,9577,9578,9579,9580,169,1102,1072,1073,1094,1076,1077,1092,1075,1093,1080,1081,1082,1083,1084,1085,1086,1087,1103,1088,1089,1090,1091,1078,1074,1100,1099,1079,1096,1101,1097,1095,1098,1070,1040,1041,1062,1044,1045,1060,1043,1061,1048,1049,1050,1051,1052,1053,1054,1055,1071,1056,1057,1058,1059,1046,1042,1068,1067,1047,1064,1069,1065,1063,1066],
|
|||
|
"koi8-u":[9472,9474,9484,9488,9492,9496,9500,9508,9516,9524,9532,9600,9604,9608,9612,9616,9617,9618,9619,8992,9632,8729,8730,8776,8804,8805,160,8993,176,178,183,247,9552,9553,9554,1105,1108,9556,1110,1111,9559,9560,9561,9562,9563,1169,1118,9566,9567,9568,9569,1025,1028,9571,1030,1031,9574,9575,9576,9577,9578,1168,1038,169,1102,1072,1073,1094,1076,1077,1092,1075,1093,1080,1081,1082,1083,1084,1085,1086,1087,1103,1088,1089,1090,1091,1078,1074,1100,1099,1079,1096,1101,1097,1095,1098,1070,1040,1041,1062,1044,1045,1060,1043,1061,1048,1049,1050,1051,1052,1053,1054,1055,1071,1056,1057,1058,1059,1046,1042,1068,1067,1047,1064,1069,1065,1063,1066],
|
|||
|
"macintosh":[196,197,199,201,209,214,220,225,224,226,228,227,229,231,233,232,234,235,237,236,238,239,241,243,242,244,246,245,250,249,251,252,8224,176,162,163,167,8226,182,223,174,169,8482,180,168,8800,198,216,8734,177,8804,8805,165,181,8706,8721,8719,960,8747,170,186,937,230,248,191,161,172,8730,402,8776,8710,171,187,8230,160,192,195,213,338,339,8211,8212,8220,8221,8216,8217,247,9674,255,376,8260,8364,8249,8250,64257,64258,8225,183,8218,8222,8240,194,202,193,203,200,205,206,207,204,211,212,63743,210,218,219,217,305,710,732,175,728,729,730,184,733,731,711],
|
|||
|
"windows-874":[8364,129,130,131,132,8230,134,135,136,137,138,139,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,152,153,154,155,156,157,158,159,160,3585,3586,3587,3588,3589,3590,3591,3592,3593,3594,3595,3596,3597,3598,3599,3600,3601,3602,3603,3604,3605,3606,3607,3608,3609,3610,3611,3612,3613,3614,3615,3616,3617,3618,3619,3620,3621,3622,3623,3624,3625,3626,3627,3628,3629,3630,3631,3632,3633,3634,3635,3636,3637,3638,3639,3640,3641,3642,null,null,null,null,3647,3648,3649,3650,3651,3652,3653,3654,3655,3656,3657,3658,3659,3660,3661,3662,3663,3664,3665,3666,3667,3668,3669,3670,3671,3672,3673,3674,3675,null,null,null,null],
|
|||
|
"windows-1250":[8364,129,8218,131,8222,8230,8224,8225,136,8240,352,8249,346,356,381,377,144,8216,8217,8220,8221,8226,8211,8212,152,8482,353,8250,347,357,382,378,160,711,728,321,164,260,166,167,168,169,350,171,172,173,174,379,176,177,731,322,180,181,182,183,184,261,351,187,317,733,318,380,340,193,194,258,196,313,262,199,268,201,280,203,282,205,206,270,272,323,327,211,212,336,214,215,344,366,218,368,220,221,354,223,341,225,226,259,228,314,263,231,269,233,281,235,283,237,238,271,273,324,328,243,244,337,246,247,345,367,250,369,252,253,355,729],
|
|||
|
"windows-1251":[1026,1027,8218,1107,8222,8230,8224,8225,8364,8240,1033,8249,1034,1036,1035,1039,1106,8216,8217,8220,8221,8226,8211,8212,152,8482,1113,8250,1114,1116,1115,1119,160,1038,1118,1032,164,1168,166,167,1025,169,1028,171,172,173,174,1031,176,177,1030,1110,1169,181,182,183,1105,8470,1108,187,1112,1029,1109,1111,1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,1103],
|
|||
|
"windows-1252":[8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,381,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,382,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255],
|
|||
|
"windows-1253":[8364,129,8218,402,8222,8230,8224,8225,136,8240,138,8249,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,157,158,159,160,901,902,163,164,165,166,167,168,169,null,171,172,173,174,8213,176,177,178,179,900,181,182,183,904,905,906,187,908,189,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,null,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,null],
|
|||
|
"windows-1254":[8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,286,209,210,211,212,213,214,215,216,217,218,219,220,304,350,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,287,241,242,243,244,245,246,247,248,249,250,251,252,305,351,255],
|
|||
|
"windows-1255":[8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,140,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,156,157,158,159,160,161,162,163,8362,165,166,167,168,169,215,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,247,187,188,189,190,191,1456,1457,1458,1459,1460,1461,1462,1463,1464,1465,1466,1467,1468,1469,1470,1471,1472,1473,1474,1475,1520,1521,1522,1523,1524,null,null,null,null,null,null,null,1488,1489,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499,1500,1501,1502,1503,1504,1505,1506,1507,1508,1509,1510,1511,1512,1513,1514,null,null,8206,8207,null],
|
|||
|
"windows-1256":[8364,1662,8218,402,8222,8230,8224,8225,710,8240,1657,8249,338,1670,1688,1672,1711,8216,8217,8220,8221,8226,8211,8212,1705,8482,1681,8250,339,8204,8205,1722,160,1548,162,163,164,165,166,167,168,169,1726,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,1563,187,188,189,190,1567,1729,1569,1570,1571,1572,1573,1574,1575,1576,1577,1578,1579,1580,1581,1582,1583,1584,1585,1586,1587,1588,1589,1590,215,1591,1592,1593,1594,1600,1601,1602,1603,224,1604,226,1605,1606,1607,1608,231,232,233,234,235,1609,1610,238,239,1611,1612,1613,1614,244,1615,1616,247,1617,249,1618,251,252,8206,8207,1746],
|
|||
|
"windows-1257":[8364,129,8218,131,8222,8230,8224,8225,136,8240,138,8249,140,168,711,184,144,8216,8217,8220,8221,8226,8211,8212,152,8482,154,8250,156,175,731,159,160,null,162,163,164,null,166,167,216,169,342,171,172,173,174,198,176,177,178,179,180,181,182,183,248,185,343,187,188,189,190,230,260,302,256,262,196,197,280,274,268,201,377,278,290,310,298,315,352,323,325,211,332,213,214,215,370,321,346,362,220,379,381,223,261,303,257,263,228,229,281,275,269,233,378,279,291,311,299,316,353,324,326,243,333,245,246,247,371,322,347,363,252,380,382,729],
|
|||
|
"windows-1258":[8364,129,8218,402,8222,8230,8224,8225,710,8240,138,8249,338,141,142,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,154,8250,339,157,158,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,258,196,197,198,199,200,201,202,203,768,205,206,207,272,209,777,211,212,416,214,215,216,217,218,219,220,431,771,223,224,225,226,259,228,229,230,231,232,233,234,235,769,237,238,239,273,241,803,243,244,417,246,247,248,249,250,251,252,432,8363,255],
|
|||
|
"x-mac-cyrillic":[1040,1041,1042,1043,1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,1064,1065,1066,1067,1068,1069,1070,1071,8224,176,1168,163,167,8226,182,1030,174,169,8482,1026,1106,8800,1027,1107,8734,177,8804,8805,1110,181,1169,1032,1028,1108,1031,1111,1033,1113,1034,1114,1112,1029,172,8730,402,8776,8710,171,187,8230,160,1035,1115,1036,1116,1109,8211,8212,8220,8221,8216,8217,247,8222,1038,1118,1039,1119,8470,1025,1105,1103,1072,1073,1074,1075,1076,1077,1078,1079,1080,1081,1082,1083,1084,1085,1086,1087,1088,1089,1090,1091,1092,1093,1094,1095,1096,1097,1098,1099,1100,1101,1102,8364]
|
|||
|
}
|
|||
|
;}(this));
|
|||
|
|
|||
|
// This is free and unencumbered software released into the public domain.
|
|||
|
// See LICENSE.md for more information.
|
|||
|
|
|||
|
// If we're in node require encoding-indexes and attach it to the global.
|
|||
|
/**
|
|||
|
* @fileoverview Global |this| required for resolving indexes in node.
|
|||
|
* @suppress {globalThis}
|
|||
|
*/
|
|||
|
if (typeof module !== "undefined" && module.exports &&
|
|||
|
!this["encoding-indexes"]) {
|
|||
|
require("./encoding-indexes.js");
|
|||
|
}
|
|||
|
|
|||
|
(function(global) {
|
|||
|
'use strict';
|
|||
|
|
|||
|
//
|
|||
|
// Utilities
|
|||
|
//
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} a The number to test.
|
|||
|
* @param {number} min The minimum value in the range, inclusive.
|
|||
|
* @param {number} max The maximum value in the range, inclusive.
|
|||
|
* @return {boolean} True if a >= min and a <= max.
|
|||
|
*/
|
|||
|
function inRange(a, min, max) {
|
|||
|
return min <= a && a <= max;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {!Array.<*>} array The array to check.
|
|||
|
* @param {*} item The item to look for in the array.
|
|||
|
* @return {boolean} True if the item appears in the array.
|
|||
|
*/
|
|||
|
function includes(array, item) {
|
|||
|
return array.indexOf(item) !== -1;
|
|||
|
}
|
|||
|
|
|||
|
var floor = Math.floor;
|
|||
|
|
|||
|
/**
|
|||
|
* @param {*} o
|
|||
|
* @return {Object}
|
|||
|
*/
|
|||
|
function ToDictionary(o) {
|
|||
|
if (o === undefined) return {};
|
|||
|
if (o === Object(o)) return o;
|
|||
|
throw TypeError('Could not convert argument to dictionary');
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {string} string Input string of UTF-16 code units.
|
|||
|
* @return {!Array.<number>} Code points.
|
|||
|
*/
|
|||
|
function stringToCodePoints(string) {
|
|||
|
// https://heycam.github.io/webidl/#dfn-obtain-unicode
|
|||
|
|
|||
|
// 1. Let S be the DOMString value.
|
|||
|
var s = String(string);
|
|||
|
|
|||
|
// 2. Let n be the length of S.
|
|||
|
var n = s.length;
|
|||
|
|
|||
|
// 3. Initialize i to 0.
|
|||
|
var i = 0;
|
|||
|
|
|||
|
// 4. Initialize U to be an empty sequence of Unicode characters.
|
|||
|
var u = [];
|
|||
|
|
|||
|
// 5. While i < n:
|
|||
|
while (i < n) {
|
|||
|
|
|||
|
// 1. Let c be the code unit in S at index i.
|
|||
|
var c = s.charCodeAt(i);
|
|||
|
|
|||
|
// 2. Depending on the value of c:
|
|||
|
|
|||
|
// c < 0xD800 or c > 0xDFFF
|
|||
|
if (c < 0xD800 || c > 0xDFFF) {
|
|||
|
// Append to U the Unicode character with code point c.
|
|||
|
u.push(c);
|
|||
|
}
|
|||
|
|
|||
|
// 0xDC00 ≤ c ≤ 0xDFFF
|
|||
|
else if (0xDC00 <= c && c <= 0xDFFF) {
|
|||
|
// Append to U a U+FFFD REPLACEMENT CHARACTER.
|
|||
|
u.push(0xFFFD);
|
|||
|
}
|
|||
|
|
|||
|
// 0xD800 ≤ c ≤ 0xDBFF
|
|||
|
else if (0xD800 <= c && c <= 0xDBFF) {
|
|||
|
// 1. If i = n−1, then append to U a U+FFFD REPLACEMENT
|
|||
|
// CHARACTER.
|
|||
|
if (i === n - 1) {
|
|||
|
u.push(0xFFFD);
|
|||
|
}
|
|||
|
// 2. Otherwise, i < n−1:
|
|||
|
else {
|
|||
|
// 1. Let d be the code unit in S at index i+1.
|
|||
|
var d = s.charCodeAt(i + 1);
|
|||
|
|
|||
|
// 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
|
|||
|
if (0xDC00 <= d && d <= 0xDFFF) {
|
|||
|
// 1. Let a be c & 0x3FF.
|
|||
|
var a = c & 0x3FF;
|
|||
|
|
|||
|
// 2. Let b be d & 0x3FF.
|
|||
|
var b = d & 0x3FF;
|
|||
|
|
|||
|
// 3. Append to U the Unicode character with code point
|
|||
|
// 2^16+2^10*a+b.
|
|||
|
u.push(0x10000 + (a << 10) + b);
|
|||
|
|
|||
|
// 4. Set i to i+1.
|
|||
|
i += 1;
|
|||
|
}
|
|||
|
|
|||
|
// 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
|
|||
|
// U+FFFD REPLACEMENT CHARACTER.
|
|||
|
else {
|
|||
|
u.push(0xFFFD);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// 3. Set i to i+1.
|
|||
|
i += 1;
|
|||
|
}
|
|||
|
|
|||
|
// 6. Return U.
|
|||
|
return u;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {!Array.<number>} code_points Array of code points.
|
|||
|
* @return {string} string String of UTF-16 code units.
|
|||
|
*/
|
|||
|
function codePointsToString(code_points) {
|
|||
|
var s = '';
|
|||
|
for (var i = 0; i < code_points.length; ++i) {
|
|||
|
var cp = code_points[i];
|
|||
|
if (cp <= 0xFFFF) {
|
|||
|
s += String.fromCharCode(cp);
|
|||
|
} else {
|
|||
|
cp -= 0x10000;
|
|||
|
s += String.fromCharCode((cp >> 10) + 0xD800,
|
|||
|
(cp & 0x3FF) + 0xDC00);
|
|||
|
}
|
|||
|
}
|
|||
|
return s;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
//
|
|||
|
// Implementation of Encoding specification
|
|||
|
// https://encoding.spec.whatwg.org/
|
|||
|
//
|
|||
|
|
|||
|
//
|
|||
|
// 4. Terminology
|
|||
|
//
|
|||
|
|
|||
|
/**
|
|||
|
* An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive.
|
|||
|
* @param {number} a The number to test.
|
|||
|
* @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive.
|
|||
|
*/
|
|||
|
function isASCIIByte(a) {
|
|||
|
return 0x00 <= a && a <= 0x7F;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* An ASCII code point is a code point in the range U+0000 to
|
|||
|
* U+007F, inclusive.
|
|||
|
*/
|
|||
|
var isASCIICodePoint = isASCIIByte;
|
|||
|
|
|||
|
|
|||
|
/**
|
|||
|
* End-of-stream is a special token that signifies no more tokens
|
|||
|
* are in the stream.
|
|||
|
* @const
|
|||
|
*/ var end_of_stream = -1;
|
|||
|
|
|||
|
/**
|
|||
|
* A stream represents an ordered sequence of tokens.
|
|||
|
*
|
|||
|
* @constructor
|
|||
|
* @param {!(Array.<number>|Uint8Array)} tokens Array of tokens that provide
|
|||
|
* the stream.
|
|||
|
*/
|
|||
|
function Stream(tokens) {
|
|||
|
/** @type {!Array.<number>} */
|
|||
|
this.tokens = [].slice.call(tokens);
|
|||
|
// Reversed as push/pop is more efficient than shift/unshift.
|
|||
|
this.tokens.reverse();
|
|||
|
}
|
|||
|
|
|||
|
Stream.prototype = {
|
|||
|
/**
|
|||
|
* @return {boolean} True if end-of-stream has been hit.
|
|||
|
*/
|
|||
|
endOfStream: function() {
|
|||
|
return !this.tokens.length;
|
|||
|
},
|
|||
|
|
|||
|
/**
|
|||
|
* When a token is read from a stream, the first token in the
|
|||
|
* stream must be returned and subsequently removed, and
|
|||
|
* end-of-stream must be returned otherwise.
|
|||
|
*
|
|||
|
* @return {number} Get the next token from the stream, or
|
|||
|
* end_of_stream.
|
|||
|
*/
|
|||
|
read: function() {
|
|||
|
if (!this.tokens.length)
|
|||
|
return end_of_stream;
|
|||
|
return this.tokens.pop();
|
|||
|
},
|
|||
|
|
|||
|
/**
|
|||
|
* When one or more tokens are prepended to a stream, those tokens
|
|||
|
* must be inserted, in given order, before the first token in the
|
|||
|
* stream.
|
|||
|
*
|
|||
|
* @param {(number|!Array.<number>)} token The token(s) to prepend to the
|
|||
|
* stream.
|
|||
|
*/
|
|||
|
prepend: function(token) {
|
|||
|
if (Array.isArray(token)) {
|
|||
|
var tokens = /**@type {!Array.<number>}*/(token);
|
|||
|
while (tokens.length)
|
|||
|
this.tokens.push(tokens.pop());
|
|||
|
} else {
|
|||
|
this.tokens.push(token);
|
|||
|
}
|
|||
|
},
|
|||
|
|
|||
|
/**
|
|||
|
* When one or more tokens are pushed to a stream, those tokens
|
|||
|
* must be inserted, in given order, after the last token in the
|
|||
|
* stream.
|
|||
|
*
|
|||
|
* @param {(number|!Array.<number>)} token The tokens(s) to push to the
|
|||
|
* stream.
|
|||
|
*/
|
|||
|
push: function(token) {
|
|||
|
if (Array.isArray(token)) {
|
|||
|
var tokens = /**@type {!Array.<number>}*/(token);
|
|||
|
while (tokens.length)
|
|||
|
this.tokens.unshift(tokens.shift());
|
|||
|
} else {
|
|||
|
this.tokens.unshift(token);
|
|||
|
}
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
//
|
|||
|
// 5. Encodings
|
|||
|
//
|
|||
|
|
|||
|
// 5.1 Encoders and decoders
|
|||
|
|
|||
|
/** @const */
|
|||
|
var finished = -1;
|
|||
|
|
|||
|
/**
|
|||
|
* @param {boolean} fatal If true, decoding errors raise an exception.
|
|||
|
* @param {number=} opt_code_point Override the standard fallback code point.
|
|||
|
* @return {number} The code point to insert on a decoding error.
|
|||
|
*/
|
|||
|
function decoderError(fatal, opt_code_point) {
|
|||
|
if (fatal)
|
|||
|
throw TypeError('Decoder error');
|
|||
|
return opt_code_point || 0xFFFD;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} code_point The code point that could not be encoded.
|
|||
|
* @return {number} Always throws, no value is actually returned.
|
|||
|
*/
|
|||
|
function encoderError(code_point) {
|
|||
|
throw TypeError('The code point ' + code_point + ' could not be encoded.');
|
|||
|
}
|
|||
|
|
|||
|
/** @interface */
|
|||
|
function Decoder() {}
|
|||
|
Decoder.prototype = {
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point, or |finished|.
|
|||
|
*/
|
|||
|
handler: function(stream, bite) {}
|
|||
|
};
|
|||
|
|
|||
|
/** @interface */
|
|||
|
function Encoder() {}
|
|||
|
Encoder.prototype = {
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of code points being encoded.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit, or |finished|.
|
|||
|
*/
|
|||
|
handler: function(stream, code_point) {}
|
|||
|
};
|
|||
|
|
|||
|
// 5.2 Names and labels
|
|||
|
|
|||
|
// TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
|
|||
|
// https://github.com/google/closure-compiler/issues/247
|
|||
|
|
|||
|
/**
|
|||
|
* @param {string} label The encoding label.
|
|||
|
* @return {?{name:string,labels:Array.<string>}}
|
|||
|
*/
|
|||
|
function getEncoding(label) {
|
|||
|
// 1. Remove any leading and trailing ASCII whitespace from label.
|
|||
|
label = String(label).trim().toLowerCase();
|
|||
|
|
|||
|
// 2. If label is an ASCII case-insensitive match for any of the
|
|||
|
// labels listed in the table below, return the corresponding
|
|||
|
// encoding, and failure otherwise.
|
|||
|
if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) {
|
|||
|
return label_to_encoding[label];
|
|||
|
}
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Encodings table: https://encoding.spec.whatwg.org/encodings.json
|
|||
|
* @const
|
|||
|
* @type {!Array.<{
|
|||
|
* heading: string,
|
|||
|
* encodings: Array.<{name:string,labels:Array.<string>}>
|
|||
|
* }>}
|
|||
|
*/
|
|||
|
var encodings = [
|
|||
|
{
|
|||
|
"encodings": [
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"unicode-1-1-utf-8",
|
|||
|
"utf-8",
|
|||
|
"utf8"
|
|||
|
],
|
|||
|
"name": "UTF-8"
|
|||
|
}
|
|||
|
],
|
|||
|
"heading": "The Encoding"
|
|||
|
},
|
|||
|
{
|
|||
|
"encodings": [
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"866",
|
|||
|
"cp866",
|
|||
|
"csibm866",
|
|||
|
"ibm866"
|
|||
|
],
|
|||
|
"name": "IBM866"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csisolatin2",
|
|||
|
"iso-8859-2",
|
|||
|
"iso-ir-101",
|
|||
|
"iso8859-2",
|
|||
|
"iso88592",
|
|||
|
"iso_8859-2",
|
|||
|
"iso_8859-2:1987",
|
|||
|
"l2",
|
|||
|
"latin2"
|
|||
|
],
|
|||
|
"name": "ISO-8859-2"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csisolatin3",
|
|||
|
"iso-8859-3",
|
|||
|
"iso-ir-109",
|
|||
|
"iso8859-3",
|
|||
|
"iso88593",
|
|||
|
"iso_8859-3",
|
|||
|
"iso_8859-3:1988",
|
|||
|
"l3",
|
|||
|
"latin3"
|
|||
|
],
|
|||
|
"name": "ISO-8859-3"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csisolatin4",
|
|||
|
"iso-8859-4",
|
|||
|
"iso-ir-110",
|
|||
|
"iso8859-4",
|
|||
|
"iso88594",
|
|||
|
"iso_8859-4",
|
|||
|
"iso_8859-4:1988",
|
|||
|
"l4",
|
|||
|
"latin4"
|
|||
|
],
|
|||
|
"name": "ISO-8859-4"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csisolatincyrillic",
|
|||
|
"cyrillic",
|
|||
|
"iso-8859-5",
|
|||
|
"iso-ir-144",
|
|||
|
"iso8859-5",
|
|||
|
"iso88595",
|
|||
|
"iso_8859-5",
|
|||
|
"iso_8859-5:1988"
|
|||
|
],
|
|||
|
"name": "ISO-8859-5"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"arabic",
|
|||
|
"asmo-708",
|
|||
|
"csiso88596e",
|
|||
|
"csiso88596i",
|
|||
|
"csisolatinarabic",
|
|||
|
"ecma-114",
|
|||
|
"iso-8859-6",
|
|||
|
"iso-8859-6-e",
|
|||
|
"iso-8859-6-i",
|
|||
|
"iso-ir-127",
|
|||
|
"iso8859-6",
|
|||
|
"iso88596",
|
|||
|
"iso_8859-6",
|
|||
|
"iso_8859-6:1987"
|
|||
|
],
|
|||
|
"name": "ISO-8859-6"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csisolatingreek",
|
|||
|
"ecma-118",
|
|||
|
"elot_928",
|
|||
|
"greek",
|
|||
|
"greek8",
|
|||
|
"iso-8859-7",
|
|||
|
"iso-ir-126",
|
|||
|
"iso8859-7",
|
|||
|
"iso88597",
|
|||
|
"iso_8859-7",
|
|||
|
"iso_8859-7:1987",
|
|||
|
"sun_eu_greek"
|
|||
|
],
|
|||
|
"name": "ISO-8859-7"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csiso88598e",
|
|||
|
"csisolatinhebrew",
|
|||
|
"hebrew",
|
|||
|
"iso-8859-8",
|
|||
|
"iso-8859-8-e",
|
|||
|
"iso-ir-138",
|
|||
|
"iso8859-8",
|
|||
|
"iso88598",
|
|||
|
"iso_8859-8",
|
|||
|
"iso_8859-8:1988",
|
|||
|
"visual"
|
|||
|
],
|
|||
|
"name": "ISO-8859-8"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csiso88598i",
|
|||
|
"iso-8859-8-i",
|
|||
|
"logical"
|
|||
|
],
|
|||
|
"name": "ISO-8859-8-I"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csisolatin6",
|
|||
|
"iso-8859-10",
|
|||
|
"iso-ir-157",
|
|||
|
"iso8859-10",
|
|||
|
"iso885910",
|
|||
|
"l6",
|
|||
|
"latin6"
|
|||
|
],
|
|||
|
"name": "ISO-8859-10"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"iso-8859-13",
|
|||
|
"iso8859-13",
|
|||
|
"iso885913"
|
|||
|
],
|
|||
|
"name": "ISO-8859-13"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"iso-8859-14",
|
|||
|
"iso8859-14",
|
|||
|
"iso885914"
|
|||
|
],
|
|||
|
"name": "ISO-8859-14"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csisolatin9",
|
|||
|
"iso-8859-15",
|
|||
|
"iso8859-15",
|
|||
|
"iso885915",
|
|||
|
"iso_8859-15",
|
|||
|
"l9"
|
|||
|
],
|
|||
|
"name": "ISO-8859-15"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"iso-8859-16"
|
|||
|
],
|
|||
|
"name": "ISO-8859-16"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cskoi8r",
|
|||
|
"koi",
|
|||
|
"koi8",
|
|||
|
"koi8-r",
|
|||
|
"koi8_r"
|
|||
|
],
|
|||
|
"name": "KOI8-R"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"koi8-ru",
|
|||
|
"koi8-u"
|
|||
|
],
|
|||
|
"name": "KOI8-U"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csmacintosh",
|
|||
|
"mac",
|
|||
|
"macintosh",
|
|||
|
"x-mac-roman"
|
|||
|
],
|
|||
|
"name": "macintosh"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"dos-874",
|
|||
|
"iso-8859-11",
|
|||
|
"iso8859-11",
|
|||
|
"iso885911",
|
|||
|
"tis-620",
|
|||
|
"windows-874"
|
|||
|
],
|
|||
|
"name": "windows-874"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cp1250",
|
|||
|
"windows-1250",
|
|||
|
"x-cp1250"
|
|||
|
],
|
|||
|
"name": "windows-1250"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cp1251",
|
|||
|
"windows-1251",
|
|||
|
"x-cp1251"
|
|||
|
],
|
|||
|
"name": "windows-1251"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"ansi_x3.4-1968",
|
|||
|
"ascii",
|
|||
|
"cp1252",
|
|||
|
"cp819",
|
|||
|
"csisolatin1",
|
|||
|
"ibm819",
|
|||
|
"iso-8859-1",
|
|||
|
"iso-ir-100",
|
|||
|
"iso8859-1",
|
|||
|
"iso88591",
|
|||
|
"iso_8859-1",
|
|||
|
"iso_8859-1:1987",
|
|||
|
"l1",
|
|||
|
"latin1",
|
|||
|
"us-ascii",
|
|||
|
"windows-1252",
|
|||
|
"x-cp1252"
|
|||
|
],
|
|||
|
"name": "windows-1252"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cp1253",
|
|||
|
"windows-1253",
|
|||
|
"x-cp1253"
|
|||
|
],
|
|||
|
"name": "windows-1253"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cp1254",
|
|||
|
"csisolatin5",
|
|||
|
"iso-8859-9",
|
|||
|
"iso-ir-148",
|
|||
|
"iso8859-9",
|
|||
|
"iso88599",
|
|||
|
"iso_8859-9",
|
|||
|
"iso_8859-9:1989",
|
|||
|
"l5",
|
|||
|
"latin5",
|
|||
|
"windows-1254",
|
|||
|
"x-cp1254"
|
|||
|
],
|
|||
|
"name": "windows-1254"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cp1255",
|
|||
|
"windows-1255",
|
|||
|
"x-cp1255"
|
|||
|
],
|
|||
|
"name": "windows-1255"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cp1256",
|
|||
|
"windows-1256",
|
|||
|
"x-cp1256"
|
|||
|
],
|
|||
|
"name": "windows-1256"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cp1257",
|
|||
|
"windows-1257",
|
|||
|
"x-cp1257"
|
|||
|
],
|
|||
|
"name": "windows-1257"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cp1258",
|
|||
|
"windows-1258",
|
|||
|
"x-cp1258"
|
|||
|
],
|
|||
|
"name": "windows-1258"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"x-mac-cyrillic",
|
|||
|
"x-mac-ukrainian"
|
|||
|
],
|
|||
|
"name": "x-mac-cyrillic"
|
|||
|
}
|
|||
|
],
|
|||
|
"heading": "Legacy single-byte encodings"
|
|||
|
},
|
|||
|
{
|
|||
|
"encodings": [
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"chinese",
|
|||
|
"csgb2312",
|
|||
|
"csiso58gb231280",
|
|||
|
"gb2312",
|
|||
|
"gb_2312",
|
|||
|
"gb_2312-80",
|
|||
|
"gbk",
|
|||
|
"iso-ir-58",
|
|||
|
"x-gbk"
|
|||
|
],
|
|||
|
"name": "GBK"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"gb18030"
|
|||
|
],
|
|||
|
"name": "gb18030"
|
|||
|
}
|
|||
|
],
|
|||
|
"heading": "Legacy multi-byte Chinese (simplified) encodings"
|
|||
|
},
|
|||
|
{
|
|||
|
"encodings": [
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"big5",
|
|||
|
"big5-hkscs",
|
|||
|
"cn-big5",
|
|||
|
"csbig5",
|
|||
|
"x-x-big5"
|
|||
|
],
|
|||
|
"name": "Big5"
|
|||
|
}
|
|||
|
],
|
|||
|
"heading": "Legacy multi-byte Chinese (traditional) encodings"
|
|||
|
},
|
|||
|
{
|
|||
|
"encodings": [
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cseucpkdfmtjapanese",
|
|||
|
"euc-jp",
|
|||
|
"x-euc-jp"
|
|||
|
],
|
|||
|
"name": "EUC-JP"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csiso2022jp",
|
|||
|
"iso-2022-jp"
|
|||
|
],
|
|||
|
"name": "ISO-2022-JP"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csshiftjis",
|
|||
|
"ms932",
|
|||
|
"ms_kanji",
|
|||
|
"shift-jis",
|
|||
|
"shift_jis",
|
|||
|
"sjis",
|
|||
|
"windows-31j",
|
|||
|
"x-sjis"
|
|||
|
],
|
|||
|
"name": "Shift_JIS"
|
|||
|
}
|
|||
|
],
|
|||
|
"heading": "Legacy multi-byte Japanese encodings"
|
|||
|
},
|
|||
|
{
|
|||
|
"encodings": [
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"cseuckr",
|
|||
|
"csksc56011987",
|
|||
|
"euc-kr",
|
|||
|
"iso-ir-149",
|
|||
|
"korean",
|
|||
|
"ks_c_5601-1987",
|
|||
|
"ks_c_5601-1989",
|
|||
|
"ksc5601",
|
|||
|
"ksc_5601",
|
|||
|
"windows-949"
|
|||
|
],
|
|||
|
"name": "EUC-KR"
|
|||
|
}
|
|||
|
],
|
|||
|
"heading": "Legacy multi-byte Korean encodings"
|
|||
|
},
|
|||
|
{
|
|||
|
"encodings": [
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"csiso2022kr",
|
|||
|
"hz-gb-2312",
|
|||
|
"iso-2022-cn",
|
|||
|
"iso-2022-cn-ext",
|
|||
|
"iso-2022-kr"
|
|||
|
],
|
|||
|
"name": "replacement"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"utf-16be"
|
|||
|
],
|
|||
|
"name": "UTF-16BE"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"utf-16",
|
|||
|
"utf-16le"
|
|||
|
],
|
|||
|
"name": "UTF-16LE"
|
|||
|
},
|
|||
|
{
|
|||
|
"labels": [
|
|||
|
"x-user-defined"
|
|||
|
],
|
|||
|
"name": "x-user-defined"
|
|||
|
}
|
|||
|
],
|
|||
|
"heading": "Legacy miscellaneous encodings"
|
|||
|
}
|
|||
|
];
|
|||
|
|
|||
|
// Label to encoding registry.
|
|||
|
/** @type {Object.<string,{name:string,labels:Array.<string>}>} */
|
|||
|
var label_to_encoding = {};
|
|||
|
encodings.forEach(function(category) {
|
|||
|
category.encodings.forEach(function(encoding) {
|
|||
|
encoding.labels.forEach(function(label) {
|
|||
|
label_to_encoding[label] = encoding;
|
|||
|
});
|
|||
|
});
|
|||
|
});
|
|||
|
|
|||
|
// Registry of of encoder/decoder factories, by encoding name.
|
|||
|
/** @type {Object.<string, function({fatal:boolean}): Encoder>} */
|
|||
|
var encoders = {};
|
|||
|
/** @type {Object.<string, function({fatal:boolean}): Decoder>} */
|
|||
|
var decoders = {};
|
|||
|
|
|||
|
//
|
|||
|
// 6. Indexes
|
|||
|
//
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} pointer The |pointer| to search for.
|
|||
|
* @param {(!Array.<?number>|undefined)} index The |index| to search within.
|
|||
|
* @return {?number} The code point corresponding to |pointer| in |index|,
|
|||
|
* or null if |code point| is not in |index|.
|
|||
|
*/
|
|||
|
function indexCodePointFor(pointer, index) {
|
|||
|
if (!index) return null;
|
|||
|
return index[pointer] || null;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} code_point The |code point| to search for.
|
|||
|
* @param {!Array.<?number>} index The |index| to search within.
|
|||
|
* @return {?number} The first pointer corresponding to |code point| in
|
|||
|
* |index|, or null if |code point| is not in |index|.
|
|||
|
*/
|
|||
|
function indexPointerFor(code_point, index) {
|
|||
|
var pointer = index.indexOf(code_point);
|
|||
|
return pointer === -1 ? null : pointer;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {string} name Name of the index.
|
|||
|
* @return {(!Array.<number>|!Array.<Array.<number>>)}
|
|||
|
* */
|
|||
|
function index(name) {
|
|||
|
if (!('encoding-indexes' in global)) {
|
|||
|
throw Error("Indexes missing." +
|
|||
|
" Did you forget to include encoding-indexes.js first?");
|
|||
|
}
|
|||
|
return global['encoding-indexes'][name];
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} pointer The |pointer| to search for in the gb18030 index.
|
|||
|
* @return {?number} The code point corresponding to |pointer| in |index|,
|
|||
|
* or null if |code point| is not in the gb18030 index.
|
|||
|
*/
|
|||
|
function indexGB18030RangesCodePointFor(pointer) {
|
|||
|
// 1. If pointer is greater than 39419 and less than 189000, or
|
|||
|
// pointer is greater than 1237575, return null.
|
|||
|
if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575))
|
|||
|
return null;
|
|||
|
|
|||
|
// 2. If pointer is 7457, return code point U+E7C7.
|
|||
|
if (pointer === 7457) return 0xE7C7;
|
|||
|
|
|||
|
// 3. Let offset be the last pointer in index gb18030 ranges that
|
|||
|
// is equal to or less than pointer and let code point offset be
|
|||
|
// its corresponding code point.
|
|||
|
var offset = 0;
|
|||
|
var code_point_offset = 0;
|
|||
|
var idx = index('gb18030-ranges');
|
|||
|
var i;
|
|||
|
for (i = 0; i < idx.length; ++i) {
|
|||
|
/** @type {!Array.<number>} */
|
|||
|
var entry = idx[i];
|
|||
|
if (entry[0] <= pointer) {
|
|||
|
offset = entry[0];
|
|||
|
code_point_offset = entry[1];
|
|||
|
} else {
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// 4. Return a code point whose value is code point offset +
|
|||
|
// pointer − offset.
|
|||
|
return code_point_offset + pointer - offset;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} code_point The |code point| to locate in the gb18030 index.
|
|||
|
* @return {number} The first pointer corresponding to |code point| in the
|
|||
|
* gb18030 index.
|
|||
|
*/
|
|||
|
function indexGB18030RangesPointerFor(code_point) {
|
|||
|
// 1. If code point is U+E7C7, return pointer 7457.
|
|||
|
if (code_point === 0xE7C7) return 7457;
|
|||
|
|
|||
|
// 2. Let offset be the last code point in index gb18030 ranges
|
|||
|
// that is equal to or less than code point and let pointer offset
|
|||
|
// be its corresponding pointer.
|
|||
|
var offset = 0;
|
|||
|
var pointer_offset = 0;
|
|||
|
var idx = index('gb18030-ranges');
|
|||
|
var i;
|
|||
|
for (i = 0; i < idx.length; ++i) {
|
|||
|
/** @type {!Array.<number>} */
|
|||
|
var entry = idx[i];
|
|||
|
if (entry[1] <= code_point) {
|
|||
|
offset = entry[1];
|
|||
|
pointer_offset = entry[0];
|
|||
|
} else {
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// 3. Return a pointer whose value is pointer offset + code point
|
|||
|
// − offset.
|
|||
|
return pointer_offset + code_point - offset;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} code_point The |code_point| to search for in the Shift_JIS
|
|||
|
* index.
|
|||
|
* @return {?number} The code point corresponding to |pointer| in |index|,
|
|||
|
* or null if |code point| is not in the Shift_JIS index.
|
|||
|
*/
|
|||
|
function indexShiftJISPointerFor(code_point) {
|
|||
|
// 1. Let index be index jis0208 excluding all entries whose
|
|||
|
// pointer is in the range 8272 to 8835, inclusive.
|
|||
|
shift_jis_index = shift_jis_index ||
|
|||
|
index('jis0208').map(function(code_point, pointer) {
|
|||
|
return inRange(pointer, 8272, 8835) ? null : code_point;
|
|||
|
});
|
|||
|
var index_ = shift_jis_index;
|
|||
|
|
|||
|
// 2. Return the index pointer for code point in index.
|
|||
|
return index_.indexOf(code_point);
|
|||
|
}
|
|||
|
var shift_jis_index;
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} code_point The |code_point| to search for in the big5
|
|||
|
* index.
|
|||
|
* @return {?number} The code point corresponding to |pointer| in |index|,
|
|||
|
* or null if |code point| is not in the big5 index.
|
|||
|
*/
|
|||
|
function indexBig5PointerFor(code_point) {
|
|||
|
// 1. Let index be index Big5 excluding all entries whose pointer
|
|||
|
big5_index_no_hkscs = big5_index_no_hkscs ||
|
|||
|
index('big5').map(function(code_point, pointer) {
|
|||
|
return (pointer < (0xA1 - 0x81) * 157) ? null : code_point;
|
|||
|
});
|
|||
|
var index_ = big5_index_no_hkscs;
|
|||
|
|
|||
|
// 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or
|
|||
|
// U+5345, return the last pointer corresponding to code point in
|
|||
|
// index.
|
|||
|
if (code_point === 0x2550 || code_point === 0x255E ||
|
|||
|
code_point === 0x2561 || code_point === 0x256A ||
|
|||
|
code_point === 0x5341 || code_point === 0x5345) {
|
|||
|
return index_.lastIndexOf(code_point);
|
|||
|
}
|
|||
|
|
|||
|
// 3. Return the index pointer for code point in index.
|
|||
|
return indexPointerFor(code_point, index_);
|
|||
|
}
|
|||
|
var big5_index_no_hkscs;
|
|||
|
|
|||
|
//
|
|||
|
// 8. API
|
|||
|
//
|
|||
|
|
|||
|
/** @const */ var DEFAULT_ENCODING = 'utf-8';
|
|||
|
|
|||
|
// 8.1 Interface TextDecoder
|
|||
|
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @param {string=} label The label of the encoding;
|
|||
|
* defaults to 'utf-8'.
|
|||
|
* @param {Object=} options
|
|||
|
*/
|
|||
|
function TextDecoder(label, options) {
|
|||
|
// Web IDL conventions
|
|||
|
if (!(this instanceof TextDecoder))
|
|||
|
throw TypeError('Called as a function. Did you forget \'new\'?');
|
|||
|
label = label !== undefined ? String(label) : DEFAULT_ENCODING;
|
|||
|
options = ToDictionary(options);
|
|||
|
|
|||
|
// A TextDecoder object has an associated encoding, decoder,
|
|||
|
// stream, ignore BOM flag (initially unset), BOM seen flag
|
|||
|
// (initially unset), error mode (initially replacement), and do
|
|||
|
// not flush flag (initially unset).
|
|||
|
|
|||
|
/** @private */
|
|||
|
this._encoding = null;
|
|||
|
/** @private @type {?Decoder} */
|
|||
|
this._decoder = null;
|
|||
|
/** @private @type {boolean} */
|
|||
|
this._ignoreBOM = false;
|
|||
|
/** @private @type {boolean} */
|
|||
|
this._BOMseen = false;
|
|||
|
/** @private @type {string} */
|
|||
|
this._error_mode = 'replacement';
|
|||
|
/** @private @type {boolean} */
|
|||
|
this._do_not_flush = false;
|
|||
|
|
|||
|
|
|||
|
// 1. Let encoding be the result of getting an encoding from
|
|||
|
// label.
|
|||
|
var encoding = getEncoding(label);
|
|||
|
|
|||
|
// 2. If encoding is failure or replacement, throw a RangeError.
|
|||
|
if (encoding === null || encoding.name === 'replacement')
|
|||
|
throw RangeError('Unknown encoding: ' + label);
|
|||
|
if (!decoders[encoding.name]) {
|
|||
|
throw Error('Decoder not present.' +
|
|||
|
' Did you forget to include encoding-indexes.js first?');
|
|||
|
}
|
|||
|
|
|||
|
// 3. Let dec be a new TextDecoder object.
|
|||
|
var dec = this;
|
|||
|
|
|||
|
// 4. Set dec's encoding to encoding.
|
|||
|
dec._encoding = encoding;
|
|||
|
|
|||
|
// 5. If options's fatal member is true, set dec's error mode to
|
|||
|
// fatal.
|
|||
|
if (Boolean(options['fatal']))
|
|||
|
dec._error_mode = 'fatal';
|
|||
|
|
|||
|
// 6. If options's ignoreBOM member is true, set dec's ignore BOM
|
|||
|
// flag.
|
|||
|
if (Boolean(options['ignoreBOM']))
|
|||
|
dec._ignoreBOM = true;
|
|||
|
|
|||
|
// For pre-ES5 runtimes:
|
|||
|
if (!Object.defineProperty) {
|
|||
|
this.encoding = dec._encoding.name.toLowerCase();
|
|||
|
this.fatal = dec._error_mode === 'fatal';
|
|||
|
this.ignoreBOM = dec._ignoreBOM;
|
|||
|
}
|
|||
|
|
|||
|
// 7. Return dec.
|
|||
|
return dec;
|
|||
|
}
|
|||
|
|
|||
|
if (Object.defineProperty) {
|
|||
|
// The encoding attribute's getter must return encoding's name.
|
|||
|
Object.defineProperty(TextDecoder.prototype, 'encoding', {
|
|||
|
/** @this {TextDecoder} */
|
|||
|
get: function() { return this._encoding.name.toLowerCase(); }
|
|||
|
});
|
|||
|
|
|||
|
// The fatal attribute's getter must return true if error mode
|
|||
|
// is fatal, and false otherwise.
|
|||
|
Object.defineProperty(TextDecoder.prototype, 'fatal', {
|
|||
|
/** @this {TextDecoder} */
|
|||
|
get: function() { return this._error_mode === 'fatal'; }
|
|||
|
});
|
|||
|
|
|||
|
// The ignoreBOM attribute's getter must return true if ignore
|
|||
|
// BOM flag is set, and false otherwise.
|
|||
|
Object.defineProperty(TextDecoder.prototype, 'ignoreBOM', {
|
|||
|
/** @this {TextDecoder} */
|
|||
|
get: function() { return this._ignoreBOM; }
|
|||
|
});
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {BufferSource=} input The buffer of bytes to decode.
|
|||
|
* @param {Object=} options
|
|||
|
* @return {string} The decoded string.
|
|||
|
*/
|
|||
|
TextDecoder.prototype.decode = function decode(input, options) {
|
|||
|
var bytes;
|
|||
|
if (typeof input === 'object' && input instanceof ArrayBuffer) {
|
|||
|
bytes = new Uint8Array(input);
|
|||
|
} else if (typeof input === 'object' && 'buffer' in input &&
|
|||
|
input.buffer instanceof ArrayBuffer) {
|
|||
|
bytes = new Uint8Array(input.buffer,
|
|||
|
input.byteOffset,
|
|||
|
input.byteLength);
|
|||
|
} else {
|
|||
|
bytes = new Uint8Array(0);
|
|||
|
}
|
|||
|
|
|||
|
options = ToDictionary(options);
|
|||
|
|
|||
|
// 1. If the do not flush flag is unset, set decoder to a new
|
|||
|
// encoding's decoder, set stream to a new stream, and unset the
|
|||
|
// BOM seen flag.
|
|||
|
if (!this._do_not_flush) {
|
|||
|
this._decoder = decoders[this._encoding.name]({
|
|||
|
fatal: this._error_mode === 'fatal'});
|
|||
|
this._BOMseen = false;
|
|||
|
}
|
|||
|
|
|||
|
// 2. If options's stream is true, set the do not flush flag, and
|
|||
|
// unset the do not flush flag otherwise.
|
|||
|
this._do_not_flush = Boolean(options['stream']);
|
|||
|
|
|||
|
// 3. If input is given, push a copy of input to stream.
|
|||
|
// TODO: Align with spec algorithm - maintain stream on instance.
|
|||
|
var input_stream = new Stream(bytes);
|
|||
|
|
|||
|
// 4. Let output be a new stream.
|
|||
|
var output = [];
|
|||
|
|
|||
|
/** @type {?(number|!Array.<number>)} */
|
|||
|
var result;
|
|||
|
|
|||
|
// 5. While true:
|
|||
|
while (true) {
|
|||
|
// 1. Let token be the result of reading from stream.
|
|||
|
var token = input_stream.read();
|
|||
|
|
|||
|
// 2. If token is end-of-stream and the do not flush flag is
|
|||
|
// set, return output, serialized.
|
|||
|
// TODO: Align with spec algorithm.
|
|||
|
if (token === end_of_stream)
|
|||
|
break;
|
|||
|
|
|||
|
// 3. Otherwise, run these subsubsteps:
|
|||
|
|
|||
|
// 1. Let result be the result of processing token for decoder,
|
|||
|
// stream, output, and error mode.
|
|||
|
result = this._decoder.handler(input_stream, token);
|
|||
|
|
|||
|
// 2. If result is finished, return output, serialized.
|
|||
|
if (result === finished)
|
|||
|
break;
|
|||
|
|
|||
|
if (result !== null) {
|
|||
|
if (Array.isArray(result))
|
|||
|
output.push.apply(output, /**@type {!Array.<number>}*/(result));
|
|||
|
else
|
|||
|
output.push(result);
|
|||
|
}
|
|||
|
|
|||
|
// 3. Otherwise, if result is error, throw a TypeError.
|
|||
|
// (Thrown in handler)
|
|||
|
|
|||
|
// 4. Otherwise, do nothing.
|
|||
|
}
|
|||
|
// TODO: Align with spec algorithm.
|
|||
|
if (!this._do_not_flush) {
|
|||
|
do {
|
|||
|
result = this._decoder.handler(input_stream, input_stream.read());
|
|||
|
if (result === finished)
|
|||
|
break;
|
|||
|
if (result === null)
|
|||
|
continue;
|
|||
|
if (Array.isArray(result))
|
|||
|
output.push.apply(output, /**@type {!Array.<number>}*/(result));
|
|||
|
else
|
|||
|
output.push(result);
|
|||
|
} while (!input_stream.endOfStream());
|
|||
|
this._decoder = null;
|
|||
|
}
|
|||
|
|
|||
|
// A TextDecoder object also has an associated serialize stream
|
|||
|
// algorithm...
|
|||
|
/**
|
|||
|
* @param {!Array.<number>} stream
|
|||
|
* @return {string}
|
|||
|
* @this {TextDecoder}
|
|||
|
*/
|
|||
|
function serializeStream(stream) {
|
|||
|
// 1. Let token be the result of reading from stream.
|
|||
|
// (Done in-place on array, rather than as a stream)
|
|||
|
|
|||
|
// 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
|
|||
|
// BOM flag and BOM seen flag are unset, run these subsubsteps:
|
|||
|
if (includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) &&
|
|||
|
!this._ignoreBOM && !this._BOMseen) {
|
|||
|
if (stream.length > 0 && stream[0] === 0xFEFF) {
|
|||
|
// 1. If token is U+FEFF, set BOM seen flag.
|
|||
|
this._BOMseen = true;
|
|||
|
stream.shift();
|
|||
|
} else if (stream.length > 0) {
|
|||
|
// 2. Otherwise, if token is not end-of-stream, set BOM seen
|
|||
|
// flag and append token to stream.
|
|||
|
this._BOMseen = true;
|
|||
|
} else {
|
|||
|
// 3. Otherwise, if token is not end-of-stream, append token
|
|||
|
// to output.
|
|||
|
// (no-op)
|
|||
|
}
|
|||
|
}
|
|||
|
// 4. Otherwise, return output.
|
|||
|
return codePointsToString(stream);
|
|||
|
}
|
|||
|
|
|||
|
return serializeStream.call(this, output);
|
|||
|
};
|
|||
|
|
|||
|
// 8.2 Interface TextEncoder
|
|||
|
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @param {string=} label The label of the encoding. NONSTANDARD.
|
|||
|
* @param {Object=} options NONSTANDARD.
|
|||
|
*/
|
|||
|
function TextEncoder(label, options) {
|
|||
|
// Web IDL conventions
|
|||
|
if (!(this instanceof TextEncoder))
|
|||
|
throw TypeError('Called as a function. Did you forget \'new\'?');
|
|||
|
options = ToDictionary(options);
|
|||
|
|
|||
|
// A TextEncoder object has an associated encoding and encoder.
|
|||
|
|
|||
|
/** @private */
|
|||
|
this._encoding = null;
|
|||
|
/** @private @type {?Encoder} */
|
|||
|
this._encoder = null;
|
|||
|
|
|||
|
// Non-standard
|
|||
|
/** @private @type {boolean} */
|
|||
|
this._do_not_flush = false;
|
|||
|
/** @private @type {string} */
|
|||
|
this._fatal = Boolean(options['fatal']) ? 'fatal' : 'replacement';
|
|||
|
|
|||
|
// 1. Let enc be a new TextEncoder object.
|
|||
|
var enc = this;
|
|||
|
|
|||
|
// 2. Set enc's encoding to UTF-8's encoder.
|
|||
|
if (Boolean(options['NONSTANDARD_allowLegacyEncoding'])) {
|
|||
|
// NONSTANDARD behavior.
|
|||
|
label = label !== undefined ? String(label) : DEFAULT_ENCODING;
|
|||
|
var encoding = getEncoding(label);
|
|||
|
if (encoding === null || encoding.name === 'replacement')
|
|||
|
throw RangeError('Unknown encoding: ' + label);
|
|||
|
if (!encoders[encoding.name]) {
|
|||
|
throw Error('Encoder not present.' +
|
|||
|
' Did you forget to include encoding-indexes.js first?');
|
|||
|
}
|
|||
|
enc._encoding = encoding;
|
|||
|
} else {
|
|||
|
// Standard behavior.
|
|||
|
enc._encoding = getEncoding('utf-8');
|
|||
|
|
|||
|
if (label !== undefined && 'console' in global) {
|
|||
|
console.warn('TextEncoder constructor called with encoding label, '
|
|||
|
+ 'which is ignored.');
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// For pre-ES5 runtimes:
|
|||
|
if (!Object.defineProperty)
|
|||
|
this.encoding = enc._encoding.name.toLowerCase();
|
|||
|
|
|||
|
// 3. Return enc.
|
|||
|
return enc;
|
|||
|
}
|
|||
|
|
|||
|
if (Object.defineProperty) {
|
|||
|
// The encoding attribute's getter must return encoding's name.
|
|||
|
Object.defineProperty(TextEncoder.prototype, 'encoding', {
|
|||
|
/** @this {TextEncoder} */
|
|||
|
get: function() { return this._encoding.name.toLowerCase(); }
|
|||
|
});
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* @param {string=} opt_string The string to encode.
|
|||
|
* @param {Object=} options
|
|||
|
* @return {!Uint8Array} Encoded bytes, as a Uint8Array.
|
|||
|
*/
|
|||
|
TextEncoder.prototype.encode = function encode(opt_string, options) {
|
|||
|
opt_string = opt_string === undefined ? '' : String(opt_string);
|
|||
|
options = ToDictionary(options);
|
|||
|
|
|||
|
// NOTE: This option is nonstandard. None of the encodings
|
|||
|
// permitted for encoding (i.e. UTF-8, UTF-16) are stateful when
|
|||
|
// the input is a USVString so streaming is not necessary.
|
|||
|
if (!this._do_not_flush)
|
|||
|
this._encoder = encoders[this._encoding.name]({
|
|||
|
fatal: this._fatal === 'fatal'});
|
|||
|
this._do_not_flush = Boolean(options['stream']);
|
|||
|
|
|||
|
// 1. Convert input to a stream.
|
|||
|
var input = new Stream(stringToCodePoints(opt_string));
|
|||
|
|
|||
|
// 2. Let output be a new stream
|
|||
|
var output = [];
|
|||
|
|
|||
|
/** @type {?(number|!Array.<number>)} */
|
|||
|
var result;
|
|||
|
// 3. While true, run these substeps:
|
|||
|
while (true) {
|
|||
|
// 1. Let token be the result of reading from input.
|
|||
|
var token = input.read();
|
|||
|
if (token === end_of_stream)
|
|||
|
break;
|
|||
|
// 2. Let result be the result of processing token for encoder,
|
|||
|
// input, output.
|
|||
|
result = this._encoder.handler(input, token);
|
|||
|
if (result === finished)
|
|||
|
break;
|
|||
|
if (Array.isArray(result))
|
|||
|
output.push.apply(output, /**@type {!Array.<number>}*/(result));
|
|||
|
else
|
|||
|
output.push(result);
|
|||
|
}
|
|||
|
// TODO: Align with spec algorithm.
|
|||
|
if (!this._do_not_flush) {
|
|||
|
while (true) {
|
|||
|
result = this._encoder.handler(input, input.read());
|
|||
|
if (result === finished)
|
|||
|
break;
|
|||
|
if (Array.isArray(result))
|
|||
|
output.push.apply(output, /**@type {!Array.<number>}*/(result));
|
|||
|
else
|
|||
|
output.push(result);
|
|||
|
}
|
|||
|
this._encoder = null;
|
|||
|
}
|
|||
|
// 3. If result is finished, convert output into a byte sequence,
|
|||
|
// and then return a Uint8Array object wrapping an ArrayBuffer
|
|||
|
// containing output.
|
|||
|
return new Uint8Array(output);
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
//
|
|||
|
// 9. The encoding
|
|||
|
//
|
|||
|
|
|||
|
// 9.1 utf-8
|
|||
|
|
|||
|
// 9.1.1 utf-8 decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function UTF8Decoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
|
|||
|
// utf-8's decoder's has an associated utf-8 code point, utf-8
|
|||
|
// bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
|
|||
|
// lower boundary (initially 0x80), and a utf-8 upper boundary
|
|||
|
// (initially 0xBF).
|
|||
|
var /** @type {number} */ utf8_code_point = 0,
|
|||
|
/** @type {number} */ utf8_bytes_seen = 0,
|
|||
|
/** @type {number} */ utf8_bytes_needed = 0,
|
|||
|
/** @type {number} */ utf8_lower_boundary = 0x80,
|
|||
|
/** @type {number} */ utf8_upper_boundary = 0xBF;
|
|||
|
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
|
|||
|
// set utf-8 bytes needed to 0 and return error.
|
|||
|
if (bite === end_of_stream && utf8_bytes_needed !== 0) {
|
|||
|
utf8_bytes_needed = 0;
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 2. If byte is end-of-stream, return finished.
|
|||
|
if (bite === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 3. If utf-8 bytes needed is 0, based on byte:
|
|||
|
if (utf8_bytes_needed === 0) {
|
|||
|
|
|||
|
// 0x00 to 0x7F
|
|||
|
if (inRange(bite, 0x00, 0x7F)) {
|
|||
|
// Return a code point whose value is byte.
|
|||
|
return bite;
|
|||
|
}
|
|||
|
|
|||
|
// 0xC2 to 0xDF
|
|||
|
else if (inRange(bite, 0xC2, 0xDF)) {
|
|||
|
// 1. Set utf-8 bytes needed to 1.
|
|||
|
utf8_bytes_needed = 1;
|
|||
|
|
|||
|
// 2. Set UTF-8 code point to byte & 0x1F.
|
|||
|
utf8_code_point = bite & 0x1F;
|
|||
|
}
|
|||
|
|
|||
|
// 0xE0 to 0xEF
|
|||
|
else if (inRange(bite, 0xE0, 0xEF)) {
|
|||
|
// 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
|
|||
|
if (bite === 0xE0)
|
|||
|
utf8_lower_boundary = 0xA0;
|
|||
|
// 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
|
|||
|
if (bite === 0xED)
|
|||
|
utf8_upper_boundary = 0x9F;
|
|||
|
// 3. Set utf-8 bytes needed to 2.
|
|||
|
utf8_bytes_needed = 2;
|
|||
|
// 4. Set UTF-8 code point to byte & 0xF.
|
|||
|
utf8_code_point = bite & 0xF;
|
|||
|
}
|
|||
|
|
|||
|
// 0xF0 to 0xF4
|
|||
|
else if (inRange(bite, 0xF0, 0xF4)) {
|
|||
|
// 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
|
|||
|
if (bite === 0xF0)
|
|||
|
utf8_lower_boundary = 0x90;
|
|||
|
// 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
|
|||
|
if (bite === 0xF4)
|
|||
|
utf8_upper_boundary = 0x8F;
|
|||
|
// 3. Set utf-8 bytes needed to 3.
|
|||
|
utf8_bytes_needed = 3;
|
|||
|
// 4. Set UTF-8 code point to byte & 0x7.
|
|||
|
utf8_code_point = bite & 0x7;
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
else {
|
|||
|
// Return error.
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// Return continue.
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 4. If byte is not in the range utf-8 lower boundary to utf-8
|
|||
|
// upper boundary, inclusive, run these substeps:
|
|||
|
if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) {
|
|||
|
|
|||
|
// 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
|
|||
|
// bytes seen to 0, set utf-8 lower boundary to 0x80, and set
|
|||
|
// utf-8 upper boundary to 0xBF.
|
|||
|
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
|
|||
|
utf8_lower_boundary = 0x80;
|
|||
|
utf8_upper_boundary = 0xBF;
|
|||
|
|
|||
|
// 2. Prepend byte to stream.
|
|||
|
stream.prepend(bite);
|
|||
|
|
|||
|
// 3. Return error.
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
|
|||
|
// to 0xBF.
|
|||
|
utf8_lower_boundary = 0x80;
|
|||
|
utf8_upper_boundary = 0xBF;
|
|||
|
|
|||
|
// 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
|
|||
|
// 0x3F)
|
|||
|
utf8_code_point = (utf8_code_point << 6) | (bite & 0x3F);
|
|||
|
|
|||
|
// 7. Increase utf-8 bytes seen by one.
|
|||
|
utf8_bytes_seen += 1;
|
|||
|
|
|||
|
// 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
|
|||
|
// continue.
|
|||
|
if (utf8_bytes_seen !== utf8_bytes_needed)
|
|||
|
return null;
|
|||
|
|
|||
|
// 9. Let code point be utf-8 code point.
|
|||
|
var code_point = utf8_code_point;
|
|||
|
|
|||
|
// 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
|
|||
|
// seen to 0.
|
|||
|
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
|
|||
|
|
|||
|
// 11. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 9.1.2 utf-8 encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function UTF8Encoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is an ASCII code point, return a byte whose
|
|||
|
// value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 3. Set count and offset based on the range code point is in:
|
|||
|
var count, offset;
|
|||
|
// U+0080 to U+07FF, inclusive:
|
|||
|
if (inRange(code_point, 0x0080, 0x07FF)) {
|
|||
|
// 1 and 0xC0
|
|||
|
count = 1;
|
|||
|
offset = 0xC0;
|
|||
|
}
|
|||
|
// U+0800 to U+FFFF, inclusive:
|
|||
|
else if (inRange(code_point, 0x0800, 0xFFFF)) {
|
|||
|
// 2 and 0xE0
|
|||
|
count = 2;
|
|||
|
offset = 0xE0;
|
|||
|
}
|
|||
|
// U+10000 to U+10FFFF, inclusive:
|
|||
|
else if (inRange(code_point, 0x10000, 0x10FFFF)) {
|
|||
|
// 3 and 0xF0
|
|||
|
count = 3;
|
|||
|
offset = 0xF0;
|
|||
|
}
|
|||
|
|
|||
|
// 4. Let bytes be a byte sequence whose first byte is (code
|
|||
|
// point >> (6 × count)) + offset.
|
|||
|
var bytes = [(code_point >> (6 * count)) + offset];
|
|||
|
|
|||
|
// 5. Run these substeps while count is greater than 0:
|
|||
|
while (count > 0) {
|
|||
|
|
|||
|
// 1. Set temp to code point >> (6 × (count − 1)).
|
|||
|
var temp = code_point >> (6 * (count - 1));
|
|||
|
|
|||
|
// 2. Append to bytes 0x80 | (temp & 0x3F).
|
|||
|
bytes.push(0x80 | (temp & 0x3F));
|
|||
|
|
|||
|
// 3. Decrease count by one.
|
|||
|
count -= 1;
|
|||
|
}
|
|||
|
|
|||
|
// 6. Return bytes bytes, in order.
|
|||
|
return bytes;
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['UTF-8'] = function(options) {
|
|||
|
return new UTF8Encoder(options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['UTF-8'] = function(options) {
|
|||
|
return new UTF8Decoder(options);
|
|||
|
};
|
|||
|
|
|||
|
//
|
|||
|
// 10. Legacy single-byte encodings
|
|||
|
//
|
|||
|
|
|||
|
// 10.1 single-byte decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {!Array.<number>} index The encoding index.
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function SingleByteDecoder(index, options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream, return finished.
|
|||
|
if (bite === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If byte is an ASCII byte, return a code point whose value
|
|||
|
// is byte.
|
|||
|
if (isASCIIByte(bite))
|
|||
|
return bite;
|
|||
|
|
|||
|
// 3. Let code point be the index code point for byte − 0x80 in
|
|||
|
// index single-byte.
|
|||
|
var code_point = index[bite - 0x80];
|
|||
|
|
|||
|
// 4. If code point is null, return error.
|
|||
|
if (code_point === null)
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
// 5. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 10.2 single-byte encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {!Array.<?number>} index The encoding index.
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function SingleByteEncoder(index, options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is an ASCII code point, return a byte whose
|
|||
|
// value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 3. Let pointer be the index pointer for code point in index
|
|||
|
// single-byte.
|
|||
|
var pointer = indexPointerFor(code_point, index);
|
|||
|
|
|||
|
// 4. If pointer is null, return error with code point.
|
|||
|
if (pointer === null)
|
|||
|
encoderError(code_point);
|
|||
|
|
|||
|
// 5. Return a byte whose value is pointer + 0x80.
|
|||
|
return pointer + 0x80;
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
(function() {
|
|||
|
if (!('encoding-indexes' in global))
|
|||
|
return;
|
|||
|
encodings.forEach(function(category) {
|
|||
|
if (category.heading !== 'Legacy single-byte encodings')
|
|||
|
return;
|
|||
|
category.encodings.forEach(function(encoding) {
|
|||
|
var name = encoding.name;
|
|||
|
var idx = index(name.toLowerCase());
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders[name] = function(options) {
|
|||
|
return new SingleByteDecoder(idx, options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders[name] = function(options) {
|
|||
|
return new SingleByteEncoder(idx, options);
|
|||
|
};
|
|||
|
});
|
|||
|
});
|
|||
|
}());
|
|||
|
|
|||
|
//
|
|||
|
// 11. Legacy multi-byte Chinese (simplified) encodings
|
|||
|
//
|
|||
|
|
|||
|
// 11.1 gbk
|
|||
|
|
|||
|
// 11.1.1 gbk decoder
|
|||
|
// gbk's decoder is gb18030's decoder.
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['GBK'] = function(options) {
|
|||
|
return new GB18030Decoder(options);
|
|||
|
};
|
|||
|
|
|||
|
// 11.1.2 gbk encoder
|
|||
|
// gbk's encoder is gb18030's encoder with its gbk flag set.
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['GBK'] = function(options) {
|
|||
|
return new GB18030Encoder(options, true);
|
|||
|
};
|
|||
|
|
|||
|
// 11.2 gb18030
|
|||
|
|
|||
|
// 11.2.1 gb18030 decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function GB18030Decoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
// gb18030's decoder has an associated gb18030 first, gb18030
|
|||
|
// second, and gb18030 third (all initially 0x00).
|
|||
|
var /** @type {number} */ gb18030_first = 0x00,
|
|||
|
/** @type {number} */ gb18030_second = 0x00,
|
|||
|
/** @type {number} */ gb18030_third = 0x00;
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream and gb18030 first, gb18030
|
|||
|
// second, and gb18030 third are 0x00, return finished.
|
|||
|
if (bite === end_of_stream && gb18030_first === 0x00 &&
|
|||
|
gb18030_second === 0x00 && gb18030_third === 0x00) {
|
|||
|
return finished;
|
|||
|
}
|
|||
|
// 2. If byte is end-of-stream, and gb18030 first, gb18030
|
|||
|
// second, or gb18030 third is not 0x00, set gb18030 first,
|
|||
|
// gb18030 second, and gb18030 third to 0x00, and return error.
|
|||
|
if (bite === end_of_stream &&
|
|||
|
(gb18030_first !== 0x00 || gb18030_second !== 0x00 ||
|
|||
|
gb18030_third !== 0x00)) {
|
|||
|
gb18030_first = 0x00;
|
|||
|
gb18030_second = 0x00;
|
|||
|
gb18030_third = 0x00;
|
|||
|
decoderError(fatal);
|
|||
|
}
|
|||
|
var code_point;
|
|||
|
// 3. If gb18030 third is not 0x00, run these substeps:
|
|||
|
if (gb18030_third !== 0x00) {
|
|||
|
// 1. Let code point be null.
|
|||
|
code_point = null;
|
|||
|
// 2. If byte is in the range 0x30 to 0x39, inclusive, set
|
|||
|
// code point to the index gb18030 ranges code point for
|
|||
|
// (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
|
|||
|
// 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
|
|||
|
if (inRange(bite, 0x30, 0x39)) {
|
|||
|
code_point = indexGB18030RangesCodePointFor(
|
|||
|
(((gb18030_first - 0x81) * 10 + gb18030_second - 0x30) * 126 +
|
|||
|
gb18030_third - 0x81) * 10 + bite - 0x30);
|
|||
|
}
|
|||
|
|
|||
|
// 3. Let buffer be a byte sequence consisting of gb18030
|
|||
|
// second, gb18030 third, and byte, in order.
|
|||
|
var buffer = [gb18030_second, gb18030_third, bite];
|
|||
|
|
|||
|
// 4. Set gb18030 first, gb18030 second, and gb18030 third to
|
|||
|
// 0x00.
|
|||
|
gb18030_first = 0x00;
|
|||
|
gb18030_second = 0x00;
|
|||
|
gb18030_third = 0x00;
|
|||
|
|
|||
|
// 5. If code point is null, prepend buffer to stream and
|
|||
|
// return error.
|
|||
|
if (code_point === null) {
|
|||
|
stream.prepend(buffer);
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 6. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
}
|
|||
|
|
|||
|
// 4. If gb18030 second is not 0x00, run these substeps:
|
|||
|
if (gb18030_second !== 0x00) {
|
|||
|
|
|||
|
// 1. If byte is in the range 0x81 to 0xFE, inclusive, set
|
|||
|
// gb18030 third to byte and return continue.
|
|||
|
if (inRange(bite, 0x81, 0xFE)) {
|
|||
|
gb18030_third = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 2. Prepend gb18030 second followed by byte to stream, set
|
|||
|
// gb18030 first and gb18030 second to 0x00, and return error.
|
|||
|
stream.prepend([gb18030_second, bite]);
|
|||
|
gb18030_first = 0x00;
|
|||
|
gb18030_second = 0x00;
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 5. If gb18030 first is not 0x00, run these substeps:
|
|||
|
if (gb18030_first !== 0x00) {
|
|||
|
|
|||
|
// 1. If byte is in the range 0x30 to 0x39, inclusive, set
|
|||
|
// gb18030 second to byte and return continue.
|
|||
|
if (inRange(bite, 0x30, 0x39)) {
|
|||
|
gb18030_second = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 2. Let lead be gb18030 first, let pointer be null, and set
|
|||
|
// gb18030 first to 0x00.
|
|||
|
var lead = gb18030_first;
|
|||
|
var pointer = null;
|
|||
|
gb18030_first = 0x00;
|
|||
|
|
|||
|
// 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
|
|||
|
// otherwise.
|
|||
|
var offset = bite < 0x7F ? 0x40 : 0x41;
|
|||
|
|
|||
|
// 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
|
|||
|
// to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
|
|||
|
// (byte − offset).
|
|||
|
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE))
|
|||
|
pointer = (lead - 0x81) * 190 + (bite - offset);
|
|||
|
|
|||
|
// 5. Let code point be null if pointer is null and the index
|
|||
|
// code point for pointer in index gb18030 otherwise.
|
|||
|
code_point = pointer === null ? null :
|
|||
|
indexCodePointFor(pointer, index('gb18030'));
|
|||
|
|
|||
|
// 6. If code point is null and byte is an ASCII byte, prepend
|
|||
|
// byte to stream.
|
|||
|
if (code_point === null && isASCIIByte(bite))
|
|||
|
stream.prepend(bite);
|
|||
|
|
|||
|
// 7. If code point is null, return error.
|
|||
|
if (code_point === null)
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
// 8. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
}
|
|||
|
|
|||
|
// 6. If byte is an ASCII byte, return a code point whose value
|
|||
|
// is byte.
|
|||
|
if (isASCIIByte(bite))
|
|||
|
return bite;
|
|||
|
|
|||
|
// 7. If byte is 0x80, return code point U+20AC.
|
|||
|
if (bite === 0x80)
|
|||
|
return 0x20AC;
|
|||
|
|
|||
|
// 8. If byte is in the range 0x81 to 0xFE, inclusive, set
|
|||
|
// gb18030 first to byte and return continue.
|
|||
|
if (inRange(bite, 0x81, 0xFE)) {
|
|||
|
gb18030_first = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 9. Return error.
|
|||
|
return decoderError(fatal);
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 11.2.2 gb18030 encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
* @param {boolean=} gbk_flag
|
|||
|
*/
|
|||
|
function GB18030Encoder(options, gbk_flag) {
|
|||
|
var fatal = options.fatal;
|
|||
|
// gb18030's decoder has an associated gbk flag (initially unset).
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is an ASCII code point, return a byte whose
|
|||
|
// value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 3. If code point is U+E5E5, return error with code point.
|
|||
|
if (code_point === 0xE5E5)
|
|||
|
return encoderError(code_point);
|
|||
|
|
|||
|
// 4. If the gbk flag is set and code point is U+20AC, return
|
|||
|
// byte 0x80.
|
|||
|
if (gbk_flag && code_point === 0x20AC)
|
|||
|
return 0x80;
|
|||
|
|
|||
|
// 5. Let pointer be the index pointer for code point in index
|
|||
|
// gb18030.
|
|||
|
var pointer = indexPointerFor(code_point, index('gb18030'));
|
|||
|
|
|||
|
// 6. If pointer is not null, run these substeps:
|
|||
|
if (pointer !== null) {
|
|||
|
|
|||
|
// 1. Let lead be floor(pointer / 190) + 0x81.
|
|||
|
var lead = floor(pointer / 190) + 0x81;
|
|||
|
|
|||
|
// 2. Let trail be pointer % 190.
|
|||
|
var trail = pointer % 190;
|
|||
|
|
|||
|
// 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise.
|
|||
|
var offset = trail < 0x3F ? 0x40 : 0x41;
|
|||
|
|
|||
|
// 4. Return two bytes whose values are lead and trail + offset.
|
|||
|
return [lead, trail + offset];
|
|||
|
}
|
|||
|
|
|||
|
// 7. If gbk flag is set, return error with code point.
|
|||
|
if (gbk_flag)
|
|||
|
return encoderError(code_point);
|
|||
|
|
|||
|
// 8. Set pointer to the index gb18030 ranges pointer for code
|
|||
|
// point.
|
|||
|
pointer = indexGB18030RangesPointerFor(code_point);
|
|||
|
|
|||
|
// 9. Let byte1 be floor(pointer / 10 / 126 / 10).
|
|||
|
var byte1 = floor(pointer / 10 / 126 / 10);
|
|||
|
|
|||
|
// 10. Set pointer to pointer − byte1 × 10 × 126 × 10.
|
|||
|
pointer = pointer - byte1 * 10 * 126 * 10;
|
|||
|
|
|||
|
// 11. Let byte2 be floor(pointer / 10 / 126).
|
|||
|
var byte2 = floor(pointer / 10 / 126);
|
|||
|
|
|||
|
// 12. Set pointer to pointer − byte2 × 10 × 126.
|
|||
|
pointer = pointer - byte2 * 10 * 126;
|
|||
|
|
|||
|
// 13. Let byte3 be floor(pointer / 10).
|
|||
|
var byte3 = floor(pointer / 10);
|
|||
|
|
|||
|
// 14. Let byte4 be pointer − byte3 × 10.
|
|||
|
var byte4 = pointer - byte3 * 10;
|
|||
|
|
|||
|
// 15. Return four bytes whose values are byte1 + 0x81, byte2 +
|
|||
|
// 0x30, byte3 + 0x81, byte4 + 0x30.
|
|||
|
return [byte1 + 0x81,
|
|||
|
byte2 + 0x30,
|
|||
|
byte3 + 0x81,
|
|||
|
byte4 + 0x30];
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['gb18030'] = function(options) {
|
|||
|
return new GB18030Encoder(options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['gb18030'] = function(options) {
|
|||
|
return new GB18030Decoder(options);
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
//
|
|||
|
// 12. Legacy multi-byte Chinese (traditional) encodings
|
|||
|
//
|
|||
|
|
|||
|
// 12.1 Big5
|
|||
|
|
|||
|
// 12.1.1 Big5 decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function Big5Decoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
// Big5's decoder has an associated Big5 lead (initially 0x00).
|
|||
|
var /** @type {number} */ Big5_lead = 0x00;
|
|||
|
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream and Big5 lead is not 0x00, set
|
|||
|
// Big5 lead to 0x00 and return error.
|
|||
|
if (bite === end_of_stream && Big5_lead !== 0x00) {
|
|||
|
Big5_lead = 0x00;
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 2. If byte is end-of-stream and Big5 lead is 0x00, return
|
|||
|
// finished.
|
|||
|
if (bite === end_of_stream && Big5_lead === 0x00)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
|
|||
|
// pointer be null, set Big5 lead to 0x00, and then run these
|
|||
|
// substeps:
|
|||
|
if (Big5_lead !== 0x00) {
|
|||
|
var lead = Big5_lead;
|
|||
|
var pointer = null;
|
|||
|
Big5_lead = 0x00;
|
|||
|
|
|||
|
// 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
|
|||
|
// otherwise.
|
|||
|
var offset = bite < 0x7F ? 0x40 : 0x62;
|
|||
|
|
|||
|
// 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
|
|||
|
// to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 +
|
|||
|
// (byte − offset).
|
|||
|
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE))
|
|||
|
pointer = (lead - 0x81) * 157 + (bite - offset);
|
|||
|
|
|||
|
// 3. If there is a row in the table below whose first column
|
|||
|
// is pointer, return the two code points listed in its second
|
|||
|
// column
|
|||
|
// Pointer | Code points
|
|||
|
// --------+--------------
|
|||
|
// 1133 | U+00CA U+0304
|
|||
|
// 1135 | U+00CA U+030C
|
|||
|
// 1164 | U+00EA U+0304
|
|||
|
// 1166 | U+00EA U+030C
|
|||
|
switch (pointer) {
|
|||
|
case 1133: return [0x00CA, 0x0304];
|
|||
|
case 1135: return [0x00CA, 0x030C];
|
|||
|
case 1164: return [0x00EA, 0x0304];
|
|||
|
case 1166: return [0x00EA, 0x030C];
|
|||
|
}
|
|||
|
|
|||
|
// 4. Let code point be null if pointer is null and the index
|
|||
|
// code point for pointer in index Big5 otherwise.
|
|||
|
var code_point = (pointer === null) ? null :
|
|||
|
indexCodePointFor(pointer, index('big5'));
|
|||
|
|
|||
|
// 5. If code point is null and byte is an ASCII byte, prepend
|
|||
|
// byte to stream.
|
|||
|
if (code_point === null && isASCIIByte(bite))
|
|||
|
stream.prepend(bite);
|
|||
|
|
|||
|
// 6. If code point is null, return error.
|
|||
|
if (code_point === null)
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
// 7. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
}
|
|||
|
|
|||
|
// 4. If byte is an ASCII byte, return a code point whose value
|
|||
|
// is byte.
|
|||
|
if (isASCIIByte(bite))
|
|||
|
return bite;
|
|||
|
|
|||
|
// 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
|
|||
|
// lead to byte and return continue.
|
|||
|
if (inRange(bite, 0x81, 0xFE)) {
|
|||
|
Big5_lead = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 6. Return error.
|
|||
|
return decoderError(fatal);
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 12.1.2 Big5 encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function Big5Encoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is an ASCII code point, return a byte whose
|
|||
|
// value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 3. Let pointer be the index Big5 pointer for code point.
|
|||
|
var pointer = indexBig5PointerFor(code_point);
|
|||
|
|
|||
|
// 4. If pointer is null, return error with code point.
|
|||
|
if (pointer === null)
|
|||
|
return encoderError(code_point);
|
|||
|
|
|||
|
// 5. Let lead be floor(pointer / 157) + 0x81.
|
|||
|
var lead = floor(pointer / 157) + 0x81;
|
|||
|
|
|||
|
// 6. If lead is less than 0xA1, return error with code point.
|
|||
|
if (lead < 0xA1)
|
|||
|
return encoderError(code_point);
|
|||
|
|
|||
|
// 7. Let trail be pointer % 157.
|
|||
|
var trail = pointer % 157;
|
|||
|
|
|||
|
// 8. Let offset be 0x40 if trail is less than 0x3F and 0x62
|
|||
|
// otherwise.
|
|||
|
var offset = trail < 0x3F ? 0x40 : 0x62;
|
|||
|
|
|||
|
// Return two bytes whose values are lead and trail + offset.
|
|||
|
return [lead, trail + offset];
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['Big5'] = function(options) {
|
|||
|
return new Big5Encoder(options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['Big5'] = function(options) {
|
|||
|
return new Big5Decoder(options);
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
//
|
|||
|
// 13. Legacy multi-byte Japanese encodings
|
|||
|
//
|
|||
|
|
|||
|
// 13.1 euc-jp
|
|||
|
|
|||
|
// 13.1.1 euc-jp decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function EUCJPDecoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
|
|||
|
// euc-jp's decoder has an associated euc-jp jis0212 flag
|
|||
|
// (initially unset) and euc-jp lead (initially 0x00).
|
|||
|
var /** @type {boolean} */ eucjp_jis0212_flag = false,
|
|||
|
/** @type {number} */ eucjp_lead = 0x00;
|
|||
|
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream and euc-jp lead is not 0x00, set
|
|||
|
// euc-jp lead to 0x00, and return error.
|
|||
|
if (bite === end_of_stream && eucjp_lead !== 0x00) {
|
|||
|
eucjp_lead = 0x00;
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 2. If byte is end-of-stream and euc-jp lead is 0x00, return
|
|||
|
// finished.
|
|||
|
if (bite === end_of_stream && eucjp_lead === 0x00)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to
|
|||
|
// 0xDF, inclusive, set euc-jp lead to 0x00 and return a code
|
|||
|
// point whose value is 0xFF61 − 0xA1 + byte.
|
|||
|
if (eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) {
|
|||
|
eucjp_lead = 0x00;
|
|||
|
return 0xFF61 - 0xA1 + bite;
|
|||
|
}
|
|||
|
|
|||
|
// 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to
|
|||
|
// 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead
|
|||
|
// to byte, and return continue.
|
|||
|
if (eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) {
|
|||
|
eucjp_jis0212_flag = true;
|
|||
|
eucjp_lead = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set
|
|||
|
// euc-jp lead to 0x00, and run these substeps:
|
|||
|
if (eucjp_lead !== 0x00) {
|
|||
|
var lead = eucjp_lead;
|
|||
|
eucjp_lead = 0x00;
|
|||
|
|
|||
|
// 1. Let code point be null.
|
|||
|
var code_point = null;
|
|||
|
|
|||
|
// 2. If lead and byte are both in the range 0xA1 to 0xFE,
|
|||
|
// inclusive, set code point to the index code point for (lead
|
|||
|
// − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp
|
|||
|
// jis0212 flag is unset and in index jis0212 otherwise.
|
|||
|
if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) {
|
|||
|
code_point = indexCodePointFor(
|
|||
|
(lead - 0xA1) * 94 + (bite - 0xA1),
|
|||
|
index(!eucjp_jis0212_flag ? 'jis0208' : 'jis0212'));
|
|||
|
}
|
|||
|
|
|||
|
// 3. Unset the euc-jp jis0212 flag.
|
|||
|
eucjp_jis0212_flag = false;
|
|||
|
|
|||
|
// 4. If byte is not in the range 0xA1 to 0xFE, inclusive,
|
|||
|
// prepend byte to stream.
|
|||
|
if (!inRange(bite, 0xA1, 0xFE))
|
|||
|
stream.prepend(bite);
|
|||
|
|
|||
|
// 5. If code point is null, return error.
|
|||
|
if (code_point === null)
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
// 6. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
}
|
|||
|
|
|||
|
// 6. If byte is an ASCII byte, return a code point whose value
|
|||
|
// is byte.
|
|||
|
if (isASCIIByte(bite))
|
|||
|
return bite;
|
|||
|
|
|||
|
// 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE,
|
|||
|
// inclusive, set euc-jp lead to byte and return continue.
|
|||
|
if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) {
|
|||
|
eucjp_lead = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 8. Return error.
|
|||
|
return decoderError(fatal);
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 13.1.2 euc-jp encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function EUCJPEncoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is an ASCII code point, return a byte whose
|
|||
|
// value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 3. If code point is U+00A5, return byte 0x5C.
|
|||
|
if (code_point === 0x00A5)
|
|||
|
return 0x5C;
|
|||
|
|
|||
|
// 4. If code point is U+203E, return byte 0x7E.
|
|||
|
if (code_point === 0x203E)
|
|||
|
return 0x7E;
|
|||
|
|
|||
|
// 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
|
|||
|
// return two bytes whose values are 0x8E and code point −
|
|||
|
// 0xFF61 + 0xA1.
|
|||
|
if (inRange(code_point, 0xFF61, 0xFF9F))
|
|||
|
return [0x8E, code_point - 0xFF61 + 0xA1];
|
|||
|
|
|||
|
// 6. If code point is U+2212, set it to U+FF0D.
|
|||
|
if (code_point === 0x2212)
|
|||
|
code_point = 0xFF0D;
|
|||
|
|
|||
|
// 7. Let pointer be the index pointer for code point in index
|
|||
|
// jis0208.
|
|||
|
var pointer = indexPointerFor(code_point, index('jis0208'));
|
|||
|
|
|||
|
// 8. If pointer is null, return error with code point.
|
|||
|
if (pointer === null)
|
|||
|
return encoderError(code_point);
|
|||
|
|
|||
|
// 9. Let lead be floor(pointer / 94) + 0xA1.
|
|||
|
var lead = floor(pointer / 94) + 0xA1;
|
|||
|
|
|||
|
// 10. Let trail be pointer % 94 + 0xA1.
|
|||
|
var trail = pointer % 94 + 0xA1;
|
|||
|
|
|||
|
// 11. Return two bytes whose values are lead and trail.
|
|||
|
return [lead, trail];
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['EUC-JP'] = function(options) {
|
|||
|
return new EUCJPEncoder(options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['EUC-JP'] = function(options) {
|
|||
|
return new EUCJPDecoder(options);
|
|||
|
};
|
|||
|
|
|||
|
// 13.2 iso-2022-jp
|
|||
|
|
|||
|
// 13.2.1 iso-2022-jp decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function ISO2022JPDecoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/** @enum */
|
|||
|
var states = {
|
|||
|
ASCII: 0,
|
|||
|
Roman: 1,
|
|||
|
Katakana: 2,
|
|||
|
LeadByte: 3,
|
|||
|
TrailByte: 4,
|
|||
|
EscapeStart: 5,
|
|||
|
Escape: 6
|
|||
|
};
|
|||
|
// iso-2022-jp's decoder has an associated iso-2022-jp decoder
|
|||
|
// state (initially ASCII), iso-2022-jp decoder output state
|
|||
|
// (initially ASCII), iso-2022-jp lead (initially 0x00), and
|
|||
|
// iso-2022-jp output flag (initially unset).
|
|||
|
var /** @type {number} */ iso2022jp_decoder_state = states.ASCII,
|
|||
|
/** @type {number} */ iso2022jp_decoder_output_state = states.ASCII,
|
|||
|
/** @type {number} */ iso2022jp_lead = 0x00,
|
|||
|
/** @type {boolean} */ iso2022jp_output_flag = false;
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// switching on iso-2022-jp decoder state:
|
|||
|
switch (iso2022jp_decoder_state) {
|
|||
|
default:
|
|||
|
case states.ASCII:
|
|||
|
// ASCII
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
iso2022jp_decoder_state = states.EscapeStart;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B
|
|||
|
if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E
|
|||
|
&& bite !== 0x0F && bite !== 0x1B) {
|
|||
|
// Unset the iso-2022-jp output flag and return a code point
|
|||
|
// whose value is byte.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return bite;
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Return finished.
|
|||
|
return finished;
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Unset the iso-2022-jp output flag and return error.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
case states.Roman:
|
|||
|
// Roman
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
iso2022jp_decoder_state = states.EscapeStart;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 0x5C
|
|||
|
if (bite === 0x5C) {
|
|||
|
// Unset the iso-2022-jp output flag and return code point
|
|||
|
// U+00A5.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return 0x00A5;
|
|||
|
}
|
|||
|
|
|||
|
// 0x7E
|
|||
|
if (bite === 0x7E) {
|
|||
|
// Unset the iso-2022-jp output flag and return code point
|
|||
|
// U+203E.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return 0x203E;
|
|||
|
}
|
|||
|
|
|||
|
// 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E
|
|||
|
if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F
|
|||
|
&& bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) {
|
|||
|
// Unset the iso-2022-jp output flag and return a code point
|
|||
|
// whose value is byte.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return bite;
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Return finished.
|
|||
|
return finished;
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Unset the iso-2022-jp output flag and return error.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
case states.Katakana:
|
|||
|
// Katakana
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
iso2022jp_decoder_state = states.EscapeStart;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 0x21 to 0x5F
|
|||
|
if (inRange(bite, 0x21, 0x5F)) {
|
|||
|
// Unset the iso-2022-jp output flag and return a code point
|
|||
|
// whose value is 0xFF61 − 0x21 + byte.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return 0xFF61 - 0x21 + bite;
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Return finished.
|
|||
|
return finished;
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Unset the iso-2022-jp output flag and return error.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
case states.LeadByte:
|
|||
|
// Lead byte
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
iso2022jp_decoder_state = states.EscapeStart;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 0x21 to 0x7E
|
|||
|
if (inRange(bite, 0x21, 0x7E)) {
|
|||
|
// Unset the iso-2022-jp output flag, set iso-2022-jp lead
|
|||
|
// to byte, iso-2022-jp decoder state to trail byte, and
|
|||
|
// return continue.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
iso2022jp_lead = bite;
|
|||
|
iso2022jp_decoder_state = states.TrailByte;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Return finished.
|
|||
|
return finished;
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Unset the iso-2022-jp output flag and return error.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
case states.TrailByte:
|
|||
|
// Trail byte
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
iso2022jp_decoder_state = states.EscapeStart;
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 0x21 to 0x7E
|
|||
|
if (inRange(bite, 0x21, 0x7E)) {
|
|||
|
// 1. Set the iso-2022-jp decoder state to lead byte.
|
|||
|
iso2022jp_decoder_state = states.LeadByte;
|
|||
|
|
|||
|
// 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21.
|
|||
|
var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21;
|
|||
|
|
|||
|
// 3. Let code point be the index code point for pointer in
|
|||
|
// index jis0208.
|
|||
|
var code_point = indexCodePointFor(pointer, index('jis0208'));
|
|||
|
|
|||
|
// 4. If code point is null, return error.
|
|||
|
if (code_point === null)
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
// 5. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Set the iso-2022-jp decoder state to lead byte, prepend
|
|||
|
// byte to stream, and return error.
|
|||
|
iso2022jp_decoder_state = states.LeadByte;
|
|||
|
stream.prepend(bite);
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Set iso-2022-jp decoder state to lead byte and return
|
|||
|
// error.
|
|||
|
iso2022jp_decoder_state = states.LeadByte;
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
case states.EscapeStart:
|
|||
|
// Escape start
|
|||
|
|
|||
|
// 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to
|
|||
|
// byte, iso-2022-jp decoder state to escape, and return
|
|||
|
// continue.
|
|||
|
if (bite === 0x24 || bite === 0x28) {
|
|||
|
iso2022jp_lead = bite;
|
|||
|
iso2022jp_decoder_state = states.Escape;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 2. Prepend byte to stream.
|
|||
|
stream.prepend(bite);
|
|||
|
|
|||
|
// 3. Unset the iso-2022-jp output flag, set iso-2022-jp
|
|||
|
// decoder state to iso-2022-jp decoder output state, and
|
|||
|
// return error.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
iso2022jp_decoder_state = iso2022jp_decoder_output_state;
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
case states.Escape:
|
|||
|
// Escape
|
|||
|
|
|||
|
// 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to
|
|||
|
// 0x00.
|
|||
|
var lead = iso2022jp_lead;
|
|||
|
iso2022jp_lead = 0x00;
|
|||
|
|
|||
|
// 2. Let state be null.
|
|||
|
var state = null;
|
|||
|
|
|||
|
// 3. If lead is 0x28 and byte is 0x42, set state to ASCII.
|
|||
|
if (lead === 0x28 && bite === 0x42)
|
|||
|
state = states.ASCII;
|
|||
|
|
|||
|
// 4. If lead is 0x28 and byte is 0x4A, set state to Roman.
|
|||
|
if (lead === 0x28 && bite === 0x4A)
|
|||
|
state = states.Roman;
|
|||
|
|
|||
|
// 5. If lead is 0x28 and byte is 0x49, set state to Katakana.
|
|||
|
if (lead === 0x28 && bite === 0x49)
|
|||
|
state = states.Katakana;
|
|||
|
|
|||
|
// 6. If lead is 0x24 and byte is either 0x40 or 0x42, set
|
|||
|
// state to lead byte.
|
|||
|
if (lead === 0x24 && (bite === 0x40 || bite === 0x42))
|
|||
|
state = states.LeadByte;
|
|||
|
|
|||
|
// 7. If state is non-null, run these substeps:
|
|||
|
if (state !== null) {
|
|||
|
// 1. Set iso-2022-jp decoder state and iso-2022-jp decoder
|
|||
|
// output state to states.
|
|||
|
iso2022jp_decoder_state = iso2022jp_decoder_state = state;
|
|||
|
|
|||
|
// 2. Let output flag be the iso-2022-jp output flag.
|
|||
|
var output_flag = iso2022jp_output_flag;
|
|||
|
|
|||
|
// 3. Set the iso-2022-jp output flag.
|
|||
|
iso2022jp_output_flag = true;
|
|||
|
|
|||
|
// 4. Return continue, if output flag is unset, and error
|
|||
|
// otherwise.
|
|||
|
return !output_flag ? null : decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 8. Prepend lead and byte to stream.
|
|||
|
stream.prepend([lead, bite]);
|
|||
|
|
|||
|
// 9. Unset the iso-2022-jp output flag, set iso-2022-jp
|
|||
|
// decoder state to iso-2022-jp decoder output state and
|
|||
|
// return error.
|
|||
|
iso2022jp_output_flag = false;
|
|||
|
iso2022jp_decoder_state = iso2022jp_decoder_output_state;
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 13.2.2 iso-2022-jp encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function ISO2022JPEncoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
// iso-2022-jp's encoder has an associated iso-2022-jp encoder
|
|||
|
// state which is one of ASCII, Roman, and jis0208 (initially
|
|||
|
// ASCII).
|
|||
|
/** @enum */
|
|||
|
var states = {
|
|||
|
ASCII: 0,
|
|||
|
Roman: 1,
|
|||
|
jis0208: 2
|
|||
|
};
|
|||
|
var /** @type {number} */ iso2022jp_state = states.ASCII;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream and iso-2022-jp encoder
|
|||
|
// state is not ASCII, prepend code point to stream, set
|
|||
|
// iso-2022-jp encoder state to ASCII, and return three bytes
|
|||
|
// 0x1B 0x28 0x42.
|
|||
|
if (code_point === end_of_stream &&
|
|||
|
iso2022jp_state !== states.ASCII) {
|
|||
|
stream.prepend(code_point);
|
|||
|
iso2022jp_state = states.ASCII;
|
|||
|
return [0x1B, 0x28, 0x42];
|
|||
|
}
|
|||
|
|
|||
|
// 2. If code point is end-of-stream and iso-2022-jp encoder
|
|||
|
// state is ASCII, return finished.
|
|||
|
if (code_point === end_of_stream && iso2022jp_state === states.ASCII)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
|
|||
|
// point is U+000E, U+000F, or U+001B, return error with U+FFFD.
|
|||
|
if ((iso2022jp_state === states.ASCII ||
|
|||
|
iso2022jp_state === states.Roman) &&
|
|||
|
(code_point === 0x000E || code_point === 0x000F ||
|
|||
|
code_point === 0x001B)) {
|
|||
|
return encoderError(0xFFFD);
|
|||
|
}
|
|||
|
|
|||
|
// 4. If iso-2022-jp encoder state is ASCII and code point is an
|
|||
|
// ASCII code point, return a byte whose value is code point.
|
|||
|
if (iso2022jp_state === states.ASCII &&
|
|||
|
isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 5. If iso-2022-jp encoder state is Roman and code point is an
|
|||
|
// ASCII code point, excluding U+005C and U+007E, or is U+00A5
|
|||
|
// or U+203E, run these substeps:
|
|||
|
if (iso2022jp_state === states.Roman &&
|
|||
|
((isASCIICodePoint(code_point) &&
|
|||
|
code_point !== 0x005C && code_point !== 0x007E) ||
|
|||
|
(code_point == 0x00A5 || code_point == 0x203E))) {
|
|||
|
|
|||
|
// 1. If code point is an ASCII code point, return a byte
|
|||
|
// whose value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 2. If code point is U+00A5, return byte 0x5C.
|
|||
|
if (code_point === 0x00A5)
|
|||
|
return 0x5C;
|
|||
|
|
|||
|
// 3. If code point is U+203E, return byte 0x7E.
|
|||
|
if (code_point === 0x203E)
|
|||
|
return 0x7E;
|
|||
|
}
|
|||
|
|
|||
|
// 6. If code point is an ASCII code point, and iso-2022-jp
|
|||
|
// encoder state is not ASCII, prepend code point to stream, set
|
|||
|
// iso-2022-jp encoder state to ASCII, and return three bytes
|
|||
|
// 0x1B 0x28 0x42.
|
|||
|
if (isASCIICodePoint(code_point) &&
|
|||
|
iso2022jp_state !== states.ASCII) {
|
|||
|
stream.prepend(code_point);
|
|||
|
iso2022jp_state = states.ASCII;
|
|||
|
return [0x1B, 0x28, 0x42];
|
|||
|
}
|
|||
|
|
|||
|
// 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
|
|||
|
// encoder state is not Roman, prepend code point to stream, set
|
|||
|
// iso-2022-jp encoder state to Roman, and return three bytes
|
|||
|
// 0x1B 0x28 0x4A.
|
|||
|
if ((code_point === 0x00A5 || code_point === 0x203E) &&
|
|||
|
iso2022jp_state !== states.Roman) {
|
|||
|
stream.prepend(code_point);
|
|||
|
iso2022jp_state = states.Roman;
|
|||
|
return [0x1B, 0x28, 0x4A];
|
|||
|
}
|
|||
|
|
|||
|
// 8. If code point is U+2212, set it to U+FF0D.
|
|||
|
if (code_point === 0x2212)
|
|||
|
code_point = 0xFF0D;
|
|||
|
|
|||
|
// 9. Let pointer be the index pointer for code point in index
|
|||
|
// jis0208.
|
|||
|
var pointer = indexPointerFor(code_point, index('jis0208'));
|
|||
|
|
|||
|
// 10. If pointer is null, return error with code point.
|
|||
|
if (pointer === null)
|
|||
|
return encoderError(code_point);
|
|||
|
|
|||
|
// 11. If iso-2022-jp encoder state is not jis0208, prepend code
|
|||
|
// point to stream, set iso-2022-jp encoder state to jis0208,
|
|||
|
// and return three bytes 0x1B 0x24 0x42.
|
|||
|
if (iso2022jp_state !== states.jis0208) {
|
|||
|
stream.prepend(code_point);
|
|||
|
iso2022jp_state = states.jis0208;
|
|||
|
return [0x1B, 0x24, 0x42];
|
|||
|
}
|
|||
|
|
|||
|
// 12. Let lead be floor(pointer / 94) + 0x21.
|
|||
|
var lead = floor(pointer / 94) + 0x21;
|
|||
|
|
|||
|
// 13. Let trail be pointer % 94 + 0x21.
|
|||
|
var trail = pointer % 94 + 0x21;
|
|||
|
|
|||
|
// 14. Return two bytes whose values are lead and trail.
|
|||
|
return [lead, trail];
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['ISO-2022-JP'] = function(options) {
|
|||
|
return new ISO2022JPEncoder(options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['ISO-2022-JP'] = function(options) {
|
|||
|
return new ISO2022JPDecoder(options);
|
|||
|
};
|
|||
|
|
|||
|
// 13.3 Shift_JIS
|
|||
|
|
|||
|
// 13.3.1 Shift_JIS decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function ShiftJISDecoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
// Shift_JIS's decoder has an associated Shift_JIS lead (initially
|
|||
|
// 0x00).
|
|||
|
var /** @type {number} */ Shift_JIS_lead = 0x00;
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream and Shift_JIS lead is not 0x00,
|
|||
|
// set Shift_JIS lead to 0x00 and return error.
|
|||
|
if (bite === end_of_stream && Shift_JIS_lead !== 0x00) {
|
|||
|
Shift_JIS_lead = 0x00;
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 2. If byte is end-of-stream and Shift_JIS lead is 0x00,
|
|||
|
// return finished.
|
|||
|
if (bite === end_of_stream && Shift_JIS_lead === 0x00)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 3. If Shift_JIS lead is not 0x00, let lead be Shift_JIS lead,
|
|||
|
// let pointer be null, set Shift_JIS lead to 0x00, and then run
|
|||
|
// these substeps:
|
|||
|
if (Shift_JIS_lead !== 0x00) {
|
|||
|
var lead = Shift_JIS_lead;
|
|||
|
var pointer = null;
|
|||
|
Shift_JIS_lead = 0x00;
|
|||
|
|
|||
|
// 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41
|
|||
|
// otherwise.
|
|||
|
var offset = (bite < 0x7F) ? 0x40 : 0x41;
|
|||
|
|
|||
|
// 2. Let lead offset be 0x81, if lead is less than 0xA0, and
|
|||
|
// 0xC1 otherwise.
|
|||
|
var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1;
|
|||
|
|
|||
|
// 3. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
|
|||
|
// to 0xFC, inclusive, set pointer to (lead − lead offset) ×
|
|||
|
// 188 + byte − offset.
|
|||
|
if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC))
|
|||
|
pointer = (lead - lead_offset) * 188 + bite - offset;
|
|||
|
|
|||
|
// 4. If pointer is in the range 8836 to 10715, inclusive,
|
|||
|
// return a code point whose value is 0xE000 − 8836 + pointer.
|
|||
|
if (inRange(pointer, 8836, 10715))
|
|||
|
return 0xE000 - 8836 + pointer;
|
|||
|
|
|||
|
// 5. Let code point be null, if pointer is null, and the
|
|||
|
// index code point for pointer in index jis0208 otherwise.
|
|||
|
var code_point = (pointer === null) ? null :
|
|||
|
indexCodePointFor(pointer, index('jis0208'));
|
|||
|
|
|||
|
// 6. If code point is null and byte is an ASCII byte, prepend
|
|||
|
// byte to stream.
|
|||
|
if (code_point === null && isASCIIByte(bite))
|
|||
|
stream.prepend(bite);
|
|||
|
|
|||
|
// 7. If code point is null, return error.
|
|||
|
if (code_point === null)
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
// 8. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
}
|
|||
|
|
|||
|
// 4. If byte is an ASCII byte or 0x80, return a code point
|
|||
|
// whose value is byte.
|
|||
|
if (isASCIIByte(bite) || bite === 0x80)
|
|||
|
return bite;
|
|||
|
|
|||
|
// 5. If byte is in the range 0xA1 to 0xDF, inclusive, return a
|
|||
|
// code point whose value is 0xFF61 − 0xA1 + byte.
|
|||
|
if (inRange(bite, 0xA1, 0xDF))
|
|||
|
return 0xFF61 - 0xA1 + bite;
|
|||
|
|
|||
|
// 6. If byte is in the range 0x81 to 0x9F, inclusive, or 0xE0
|
|||
|
// to 0xFC, inclusive, set Shift_JIS lead to byte and return
|
|||
|
// continue.
|
|||
|
if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) {
|
|||
|
Shift_JIS_lead = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 7. Return error.
|
|||
|
return decoderError(fatal);
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 13.3.2 Shift_JIS encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function ShiftJISEncoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is an ASCII code point or U+0080, return a
|
|||
|
// byte whose value is code point.
|
|||
|
if (isASCIICodePoint(code_point) || code_point === 0x0080)
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 3. If code point is U+00A5, return byte 0x5C.
|
|||
|
if (code_point === 0x00A5)
|
|||
|
return 0x5C;
|
|||
|
|
|||
|
// 4. If code point is U+203E, return byte 0x7E.
|
|||
|
if (code_point === 0x203E)
|
|||
|
return 0x7E;
|
|||
|
|
|||
|
// 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
|
|||
|
// return a byte whose value is code point − 0xFF61 + 0xA1.
|
|||
|
if (inRange(code_point, 0xFF61, 0xFF9F))
|
|||
|
return code_point - 0xFF61 + 0xA1;
|
|||
|
|
|||
|
// 6. If code point is U+2212, set it to U+FF0D.
|
|||
|
if (code_point === 0x2212)
|
|||
|
code_point = 0xFF0D;
|
|||
|
|
|||
|
// 7. Let pointer be the index Shift_JIS pointer for code point.
|
|||
|
var pointer = indexShiftJISPointerFor(code_point);
|
|||
|
|
|||
|
// 8. If pointer is null, return error with code point.
|
|||
|
if (pointer === null)
|
|||
|
return encoderError(code_point);
|
|||
|
|
|||
|
// 9. Let lead be floor(pointer / 188).
|
|||
|
var lead = floor(pointer / 188);
|
|||
|
|
|||
|
// 10. Let lead offset be 0x81, if lead is less than 0x1F, and
|
|||
|
// 0xC1 otherwise.
|
|||
|
var lead_offset = (lead < 0x1F) ? 0x81 : 0xC1;
|
|||
|
|
|||
|
// 11. Let trail be pointer % 188.
|
|||
|
var trail = pointer % 188;
|
|||
|
|
|||
|
// 12. Let offset be 0x40, if trail is less than 0x3F, and 0x41
|
|||
|
// otherwise.
|
|||
|
var offset = (trail < 0x3F) ? 0x40 : 0x41;
|
|||
|
|
|||
|
// 13. Return two bytes whose values are lead + lead offset and
|
|||
|
// trail + offset.
|
|||
|
return [lead + lead_offset, trail + offset];
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['Shift_JIS'] = function(options) {
|
|||
|
return new ShiftJISEncoder(options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['Shift_JIS'] = function(options) {
|
|||
|
return new ShiftJISDecoder(options);
|
|||
|
};
|
|||
|
|
|||
|
//
|
|||
|
// 14. Legacy multi-byte Korean encodings
|
|||
|
//
|
|||
|
|
|||
|
// 14.1 euc-kr
|
|||
|
|
|||
|
// 14.1.1 euc-kr decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function EUCKRDecoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
|
|||
|
// euc-kr's decoder has an associated euc-kr lead (initially 0x00).
|
|||
|
var /** @type {number} */ euckr_lead = 0x00;
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream and euc-kr lead is not 0x00, set
|
|||
|
// euc-kr lead to 0x00 and return error.
|
|||
|
if (bite === end_of_stream && euckr_lead !== 0) {
|
|||
|
euckr_lead = 0x00;
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 2. If byte is end-of-stream and euc-kr lead is 0x00, return
|
|||
|
// finished.
|
|||
|
if (bite === end_of_stream && euckr_lead === 0)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let
|
|||
|
// pointer be null, set euc-kr lead to 0x00, and then run these
|
|||
|
// substeps:
|
|||
|
if (euckr_lead !== 0x00) {
|
|||
|
var lead = euckr_lead;
|
|||
|
var pointer = null;
|
|||
|
euckr_lead = 0x00;
|
|||
|
|
|||
|
// 1. If byte is in the range 0x41 to 0xFE, inclusive, set
|
|||
|
// pointer to (lead − 0x81) × 190 + (byte − 0x41).
|
|||
|
if (inRange(bite, 0x41, 0xFE))
|
|||
|
pointer = (lead - 0x81) * 190 + (bite - 0x41);
|
|||
|
|
|||
|
// 2. Let code point be null, if pointer is null, and the
|
|||
|
// index code point for pointer in index euc-kr otherwise.
|
|||
|
var code_point = (pointer === null)
|
|||
|
? null : indexCodePointFor(pointer, index('euc-kr'));
|
|||
|
|
|||
|
// 3. If code point is null and byte is an ASCII byte, prepend
|
|||
|
// byte to stream.
|
|||
|
if (pointer === null && isASCIIByte(bite))
|
|||
|
stream.prepend(bite);
|
|||
|
|
|||
|
// 4. If code point is null, return error.
|
|||
|
if (code_point === null)
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
// 5. Return a code point whose value is code point.
|
|||
|
return code_point;
|
|||
|
}
|
|||
|
|
|||
|
// 4. If byte is an ASCII byte, return a code point whose value
|
|||
|
// is byte.
|
|||
|
if (isASCIIByte(bite))
|
|||
|
return bite;
|
|||
|
|
|||
|
// 5. If byte is in the range 0x81 to 0xFE, inclusive, set
|
|||
|
// euc-kr lead to byte and return continue.
|
|||
|
if (inRange(bite, 0x81, 0xFE)) {
|
|||
|
euckr_lead = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 6. Return error.
|
|||
|
return decoderError(fatal);
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 14.1.2 euc-kr encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function EUCKREncoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is an ASCII code point, return a byte whose
|
|||
|
// value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 3. Let pointer be the index pointer for code point in index
|
|||
|
// euc-kr.
|
|||
|
var pointer = indexPointerFor(code_point, index('euc-kr'));
|
|||
|
|
|||
|
// 4. If pointer is null, return error with code point.
|
|||
|
if (pointer === null)
|
|||
|
return encoderError(code_point);
|
|||
|
|
|||
|
// 5. Let lead be floor(pointer / 190) + 0x81.
|
|||
|
var lead = floor(pointer / 190) + 0x81;
|
|||
|
|
|||
|
// 6. Let trail be pointer % 190 + 0x41.
|
|||
|
var trail = (pointer % 190) + 0x41;
|
|||
|
|
|||
|
// 7. Return two bytes whose values are lead and trail.
|
|||
|
return [lead, trail];
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['EUC-KR'] = function(options) {
|
|||
|
return new EUCKREncoder(options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['EUC-KR'] = function(options) {
|
|||
|
return new EUCKRDecoder(options);
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
//
|
|||
|
// 15. Legacy miscellaneous encodings
|
|||
|
//
|
|||
|
|
|||
|
// 15.1 replacement
|
|||
|
|
|||
|
// Not needed - API throws RangeError
|
|||
|
|
|||
|
// 15.2 Common infrastructure for utf-16be and utf-16le
|
|||
|
|
|||
|
/**
|
|||
|
* @param {number} code_unit
|
|||
|
* @param {boolean} utf16be
|
|||
|
* @return {!Array.<number>} bytes
|
|||
|
*/
|
|||
|
function convertCodeUnitToBytes(code_unit, utf16be) {
|
|||
|
// 1. Let byte1 be code unit >> 8.
|
|||
|
var byte1 = code_unit >> 8;
|
|||
|
|
|||
|
// 2. Let byte2 be code unit & 0x00FF.
|
|||
|
var byte2 = code_unit & 0x00FF;
|
|||
|
|
|||
|
// 3. Then return the bytes in order:
|
|||
|
// utf-16be flag is set: byte1, then byte2.
|
|||
|
if (utf16be)
|
|||
|
return [byte1, byte2];
|
|||
|
// utf-16be flag is unset: byte2, then byte1.
|
|||
|
return [byte2, byte1];
|
|||
|
}
|
|||
|
|
|||
|
// 15.2.1 shared utf-16 decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {boolean} utf16_be True if big-endian, false if little-endian.
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function UTF16Decoder(utf16_be, options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
var /** @type {?number} */ utf16_lead_byte = null,
|
|||
|
/** @type {?number} */ utf16_lead_surrogate = null;
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream and either utf-16 lead byte or
|
|||
|
// utf-16 lead surrogate is not null, set utf-16 lead byte and
|
|||
|
// utf-16 lead surrogate to null, and return error.
|
|||
|
if (bite === end_of_stream && (utf16_lead_byte !== null ||
|
|||
|
utf16_lead_surrogate !== null)) {
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 2. If byte is end-of-stream and utf-16 lead byte and utf-16
|
|||
|
// lead surrogate are null, return finished.
|
|||
|
if (bite === end_of_stream && utf16_lead_byte === null &&
|
|||
|
utf16_lead_surrogate === null) {
|
|||
|
return finished;
|
|||
|
}
|
|||
|
|
|||
|
// 3. If utf-16 lead byte is null, set utf-16 lead byte to byte
|
|||
|
// and return continue.
|
|||
|
if (utf16_lead_byte === null) {
|
|||
|
utf16_lead_byte = bite;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 4. Let code unit be the result of:
|
|||
|
var code_unit;
|
|||
|
if (utf16_be) {
|
|||
|
// utf-16be decoder flag is set
|
|||
|
// (utf-16 lead byte << 8) + byte.
|
|||
|
code_unit = (utf16_lead_byte << 8) + bite;
|
|||
|
} else {
|
|||
|
// utf-16be decoder flag is unset
|
|||
|
// (byte << 8) + utf-16 lead byte.
|
|||
|
code_unit = (bite << 8) + utf16_lead_byte;
|
|||
|
}
|
|||
|
// Then set utf-16 lead byte to null.
|
|||
|
utf16_lead_byte = null;
|
|||
|
|
|||
|
// 5. If utf-16 lead surrogate is not null, let lead surrogate
|
|||
|
// be utf-16 lead surrogate, set utf-16 lead surrogate to null,
|
|||
|
// and then run these substeps:
|
|||
|
if (utf16_lead_surrogate !== null) {
|
|||
|
var lead_surrogate = utf16_lead_surrogate;
|
|||
|
utf16_lead_surrogate = null;
|
|||
|
|
|||
|
// 1. If code unit is in the range U+DC00 to U+DFFF,
|
|||
|
// inclusive, return a code point whose value is 0x10000 +
|
|||
|
// ((lead surrogate − 0xD800) << 10) + (code unit − 0xDC00).
|
|||
|
if (inRange(code_unit, 0xDC00, 0xDFFF)) {
|
|||
|
return 0x10000 + (lead_surrogate - 0xD800) * 0x400 +
|
|||
|
(code_unit - 0xDC00);
|
|||
|
}
|
|||
|
|
|||
|
// 2. Prepend the sequence resulting of converting code unit
|
|||
|
// to bytes using utf-16be decoder flag to stream and return
|
|||
|
// error.
|
|||
|
stream.prepend(convertCodeUnitToBytes(code_unit, utf16_be));
|
|||
|
return decoderError(fatal);
|
|||
|
}
|
|||
|
|
|||
|
// 6. If code unit is in the range U+D800 to U+DBFF, inclusive,
|
|||
|
// set utf-16 lead surrogate to code unit and return continue.
|
|||
|
if (inRange(code_unit, 0xD800, 0xDBFF)) {
|
|||
|
utf16_lead_surrogate = code_unit;
|
|||
|
return null;
|
|||
|
}
|
|||
|
|
|||
|
// 7. If code unit is in the range U+DC00 to U+DFFF, inclusive,
|
|||
|
// return error.
|
|||
|
if (inRange(code_unit, 0xDC00, 0xDFFF))
|
|||
|
return decoderError(fatal);
|
|||
|
|
|||
|
// 8. Return code point code unit.
|
|||
|
return code_unit;
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 15.2.2 shared utf-16 encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {boolean} utf16_be True if big-endian, false if little-endian.
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function UTF16Encoder(utf16_be, options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is in the range U+0000 to U+FFFF, inclusive,
|
|||
|
// return the sequence resulting of converting code point to
|
|||
|
// bytes using utf-16be encoder flag.
|
|||
|
if (inRange(code_point, 0x0000, 0xFFFF))
|
|||
|
return convertCodeUnitToBytes(code_point, utf16_be);
|
|||
|
|
|||
|
// 3. Let lead be ((code point − 0x10000) >> 10) + 0xD800,
|
|||
|
// converted to bytes using utf-16be encoder flag.
|
|||
|
var lead = convertCodeUnitToBytes(
|
|||
|
((code_point - 0x10000) >> 10) + 0xD800, utf16_be);
|
|||
|
|
|||
|
// 4. Let trail be ((code point − 0x10000) & 0x3FF) + 0xDC00,
|
|||
|
// converted to bytes using utf-16be encoder flag.
|
|||
|
var trail = convertCodeUnitToBytes(
|
|||
|
((code_point - 0x10000) & 0x3FF) + 0xDC00, utf16_be);
|
|||
|
|
|||
|
// 5. Return a byte sequence of lead followed by trail.
|
|||
|
return lead.concat(trail);
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 15.3 utf-16be
|
|||
|
// 15.3.1 utf-16be decoder
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['UTF-16BE'] = function(options) {
|
|||
|
return new UTF16Encoder(true, options);
|
|||
|
};
|
|||
|
// 15.3.2 utf-16be encoder
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['UTF-16BE'] = function(options) {
|
|||
|
return new UTF16Decoder(true, options);
|
|||
|
};
|
|||
|
|
|||
|
// 15.4 utf-16le
|
|||
|
// 15.4.1 utf-16le decoder
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['UTF-16LE'] = function(options) {
|
|||
|
return new UTF16Encoder(false, options);
|
|||
|
};
|
|||
|
// 15.4.2 utf-16le encoder
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['UTF-16LE'] = function(options) {
|
|||
|
return new UTF16Decoder(false, options);
|
|||
|
};
|
|||
|
|
|||
|
// 15.5 x-user-defined
|
|||
|
|
|||
|
// 15.5.1 x-user-defined decoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Decoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function XUserDefinedDecoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
* @return {?(number|!Array.<number>)} The next code point(s)
|
|||
|
* decoded, or null if not enough data exists in the input
|
|||
|
* stream to decode a complete code point.
|
|||
|
*/
|
|||
|
this.handler = function(stream, bite) {
|
|||
|
// 1. If byte is end-of-stream, return finished.
|
|||
|
if (bite === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If byte is an ASCII byte, return a code point whose value
|
|||
|
// is byte.
|
|||
|
if (isASCIIByte(bite))
|
|||
|
return bite;
|
|||
|
|
|||
|
// 3. Return a code point whose value is 0xF780 + byte − 0x80.
|
|||
|
return 0xF780 + bite - 0x80;
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
// 15.5.2 x-user-defined encoder
|
|||
|
/**
|
|||
|
* @constructor
|
|||
|
* @implements {Encoder}
|
|||
|
* @param {{fatal: boolean}} options
|
|||
|
*/
|
|||
|
function XUserDefinedEncoder(options) {
|
|||
|
var fatal = options.fatal;
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|||
|
*/
|
|||
|
this.handler = function(stream, code_point) {
|
|||
|
// 1.If code point is end-of-stream, return finished.
|
|||
|
if (code_point === end_of_stream)
|
|||
|
return finished;
|
|||
|
|
|||
|
// 2. If code point is an ASCII code point, return a byte whose
|
|||
|
// value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point;
|
|||
|
|
|||
|
// 3. If code point is in the range U+F780 to U+F7FF, inclusive,
|
|||
|
// return a byte whose value is code point − 0xF780 + 0x80.
|
|||
|
if (inRange(code_point, 0xF780, 0xF7FF))
|
|||
|
return code_point - 0xF780 + 0x80;
|
|||
|
|
|||
|
// 4. Return error with code point.
|
|||
|
return encoderError(code_point);
|
|||
|
};
|
|||
|
}
|
|||
|
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
encoders['x-user-defined'] = function(options) {
|
|||
|
return new XUserDefinedEncoder(options);
|
|||
|
};
|
|||
|
/** @param {{fatal: boolean}} options */
|
|||
|
decoders['x-user-defined'] = function(options) {
|
|||
|
return new XUserDefinedDecoder(options);
|
|||
|
};
|
|||
|
|
|||
|
if (!global['TextEncoder'])
|
|||
|
global['TextEncoder'] = TextEncoder;
|
|||
|
if (!global['TextDecoder'])
|
|||
|
global['TextDecoder'] = TextDecoder;
|
|||
|
|
|||
|
if (typeof module !== "undefined" && module.exports) {
|
|||
|
module.exports = {
|
|||
|
TextEncoder: global['TextEncoder'],
|
|||
|
TextDecoder: global['TextDecoder'],
|
|||
|
EncodingIndexes: global["encoding-indexes"]
|
|||
|
};
|
|||
|
}
|
|||
|
}(this));
|