Chinese tools, colorise tones

By Serge Bertrand Uploaded Aug 13, 2009
var myIdx=getLocalUrl('file:///D:/Documents and Settings/T0066167/Application Data/Mozilla/Firefox/Profiles/administrateur.default/gm_scripts/finalised_tone_is_color/simp.idx');
var myDico=getLocalUrl('file:///D:/Documents and Settings/T0066167/Application Data/Mozilla/Firefox/Profiles/administrateur.default/gm_scripts/finalised_tone_is_color/adso.dat');
const WORD=2;
const PINYIN=3;
const DEF=4;
const TONE=5;
var color= new Array('black','blue','orange','green','red','grey'); // pas d'indication, 1st,2nd,3rd,4th,without tone
var doc = jetpack.tabs.focused.contentDocument;
var win = jetpack.tabs.focused.contentWindow;
var tdata = Array();
tdata.prevTarget=null;
tdata.prevRangeNode=null;
tdata.prevRangeOfs=0;
tdata.popX=0;
tdata.popY=0;
// position of the tooltip relative to the mouse in pixel //
var offsetx = 12;
var offsety =  8;
var txtref=' ';
function getCarCode(line) {
	return line.charCodeAt(0);
}
function getLocalUrl(url) {
  var req = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"]
            .createInstance(Components.interfaces.nsIXMLHttpRequest);
  req.open('GET', url, false);
  req.overrideMimeType("text/plain");
  req.send(null);
  if (req.status == 0)
    return req.responseText;
  else
    throw new Error("Failed to get " + url);
};
// chargement de l'index dans une table indexe par code UTF

function ligne() {// lit une ligne de myidx
	var ligneContent="";
	var c="";
	while (myIdx.charAt(current)!='\r'){
			ligneContent+=myIdx.charAt(current);;
			current++;
	}
	current++;
	ligneContent+="\n";
	current++;
	return ligneContent;
}


// chargement du dictionnaire

// fonction de recherche dictionnaire
function decreaseIndex(index,word,exact) {
	//console.log('decreaseIndex  word='+word+'  exact='+exact);
	var reducedIndex='\n';
	var car=word;
	if (exact) {car+=',';}
	var i=0;
	i=index.indexOf('\n'+car,0);
	while (i!=-1){
		i++;
		//alert(myIdx.substr(i-10,20));
		do{
			c=index.charAt(i);
			reducedIndex+=c;
			i++;
		}while (c!='\n');
		i=index.indexOf('\n'+car,i-1);
	}
	//console.log('reduced index '+reducedIndex);
	return reducedIndex;
}
	
function getIndexEntries(word){
	//console.log('getIndexentries  word='+word);
	var inter='';
	var trouveIdx=/,(\d*)/;
	var isHanzi=/[\u4E00-\u9FFF]/;
	var dictLine=/([\u4E00-\u9FFF]*) ([\u4E00-\u9FFF]*)\s*\[([\u0000-\u00FF]*)]\s*\/([\u0000-\u00FF]*)\//;
	var u=new Array;
	var i; var j; var k;var l;
	var entry='';var c='';
	var entries=new Array();
	var goodEntry=new Array();
	var entry2="bonjour";
	var valid='';
	if (word==''){return false}
	//entry=decreaseIndex(myIdx,word.substr(0,1),false);
	entry=dic[getCarCode(word)];
	if (entry==undefined){console.log('un car pas connu'+word.substr(0,1)+'in word'+word);return 'inconnu';} // ya un pb
	entry2=decreaseIndex(entry,word.substr(0,1),true);
	if (entry2!='\n'){valid+=entry2.substr(1,entry.length-1);}
	if (word.length==1){return valid;}
	for (i=2;i<=word.length;i++){
		entry=decreaseIndex(entry,word.substr(0,i),false);
		if (entry=='\n') break;
		entry2=decreaseIndex(entry,word.substr(0,i),true);
		if (entry2!=='\n'){valid+=entry2.substr(1,entry.length-1);}
	}
	return valid;
}
function searchDict(word){
	//console.log ('SearchDict  word:'+word);
	var trouveIdx=/,(\d*)/;
	var isHanzi=/[\u4E00-\u9FFF]/;
	var dictLine=/([\u4E00-\u9FFF]*) ([\u4E00-\u9FFF]*)\s*\[([\u0000-\u00FF]*)]\s*\/([\u0000-\u9FFF]*)\//;
	var u=new Array;
	var i; var j; var k;var l;
	var entry='';var c='';
	var entries=new Array();
	var goodEntry=new Array();
	valid=getIndexEntries(word);
	if (valid=='inconnu') {
		var t=new Array();
		t[0]=word.substr(0,1);
		t[1]=word.substr(0,1);
		t[2]=word.substr(0,1);
		t[3]='?8';
		t[4]='inconnu';
		t[5]=8;
		goodEntry[0]=t;
		return goodEntry;
	}
		
	//console.log ('valid:'+valid);
	entries=valid.split('\n');
	if (entries==null) alert (word+'\n'+reduceDict);
	//alert(entries.toString());
	for (i=0;i<entries.length-1;i++){
		u=trouveIdx.exec(entries[i]);
		if (u==null){alert('mauvaiseligne'+line);return entry;}
		j=parseInt(u[1]);
		//alert(u[1]);
		k=0;
		entry='';
		while((u=myDico.charAt(j+k))!='\n'){
			entry +=u;
			k++;
		}
		goodEntry[i]=dictLine.exec(entry);
		if (goodEntry[i]==null) {console.log( 'pour '+word+' une entree ['+i+']: '+entry);}
		//alert(entry+'\n'+goodEntry[i].toString());
		goodEntry[i][TONE]=goodEntry[i][PINYIN].replace(/\D/g,'');
		//if(goodEntry[i][TONE].length!=goodEntry[i][WORD].length) {alert(' on a rate une marche'+word+' '+goodEntry[i].toString());}
	}
	//alert (goodEntry.toString());
	return goodEntry;
}
function getTones(chineseTxt){
	//console.log('getTones  chineseTxt='+chineseTxt);
	var dictLine=/([\u4E00-\u9FFF]*) ([\u4E00-\u9FFF]*)\s*\[([\u0000-\u00FF]*)]\s*\/([\u0000-\u00FF]*)\//;
	var tab=new Array();
	var l=12; // longueur de la cahine de caractere
	if (l>chineseTxt.length) {l=chineseTxt.length;}
	var tones='';
	var i=0;var k=0;
	var delta;
	var pinyin;
	var ton;
	var entries;
	var max;
	while(i<chineseTxt.length){
		car=chineseTxt.substr(i,l);
		entries=searchDict(car);
		if (entries.length==0){
			console.log('allo Houston on a un pb un caractere inconnu !');
			tones+='0';
		}
		else{
			if (entries[entries.length-1][TONE].length==entries[entries.length-1][WORD].length){
				tones+=entries[entries.length-1][TONE];
			}
			else {tones+='0';console.log('good entry sans tons',entries[entries.length-1]);}
			
		}
		i=tones.length;
	}
	if (i!=chineseTxt.length){
		// alert('allo Houston on a un autre pb !');
		console.log('ya un pb avec le texte:'+chineseTxt+'et les tons '+tones.toString());
		return tones.substr(0,chineseTxt.length-1);
	}
	// ce qui est au dessus et pas beau va falloir ameliorer
	return tones
}

function splitNodeChinese(txt) { //to be replaced by textNode
	//console.log('SplitNodeChinese  txt='+txt);
	var haveHanzi=/([^\u4E00-\u9FFF]*)([\u4E00-\u9FFF]*)([\u0000-\uFFFF]*)/;
	var hanzi=/[\u4E00-\u9FFF]/;
	//if (txtnode.nodeName!='#text') {return 'false';}
	//txt=txtnode.textContent;
	var tab=haveHanzi.exec(txt);
	if (tab[2]=='') { return 'noHanzi';}
	var split=new Array();
	var cars=new Array();
	var tones=new Array();
	var i=0;
	do{
		if (tab[1]!='') {cars[i]=tab[1];tones[i]=8;}
		if (tab[2]!='') {
			tt=getTones(tab[2]);
			if (tt==false){console.log('car sans ton: car= "'+tab[2]+'"  ton=false');}
			cars=cars.concat(tab[2].split(""));
			tones=tones.concat(tt.split(""));
		}
		suite=tab[3];
		tab=haveHanzi.exec(suite);
		i=tones.length;
	}while(suite!='');
	split[0]=cars;
	split[1]=tones;
	if (split[0].length!=split[1].length){alert( 'Houston moins de ton que de car');}
	return split;
}


function sourisBouge(ev) {
		doc=ev.currentTarget
		while(doc.location==undefined){doc=doc.parentNode;}
		win = doc.defaultView;
		console.log(doc.location.href);
		var rp = ev.rangeParent;
		var ro = ev.rangeOffset;

		if (ev.target == tdata.prevTarget) {
			if ((rp == tdata.prevRangeNode) && (ro == tdata.prevRangeOfs)) return;
		}

		if ((ev.explicitOriginalTarget.nodeType != 3) && !('form' in ev.target)) {
			rp = null;
			ro = -1;
		}

		tdata.prevTarget = ev.target;
		tdata.prevRangeNode = rp;
		tdata.prevRangeOfs = ro;
		if ((rp) && (rp.data) && (ro < rp.data.length)) {
			//console.log('sourisbouge'+ rp.nodeName+ 'rangeofs'+ro+' data '+rp.data);
			tdata.popX = ev.clientX;
			tdata.popY = ev.clientY;
			show (tdata,doc)//afficher new popup
		}
			// dont close just because we moved from a valid popup slightly over to a place with nothing
		var dx = tdata.popX - ev.clientX;
		var dy = tdata.popY - ev.clientY;
		var distance = Math.sqrt(dx * dx + dy * dy);
		if (distance > 4) {	exit(doc);return;}
	return;
}

function show(tdata) {
	var rp = tdata.prevRangeNode;
	var ro = tdata.prevRangeOfs;
	var u;
		if (!rp) {console.log('sortie 1');exit(doc);return;}
		u = rp.data.charCodeAt(ro);
		// if we have '   XYZ', where whitespace is compressed, X never seems to get selected
		while (((u = rp.data.charCodeAt(ro)) == 32) || (u == 9) || (u == 10)) {
			++ro;
			if (ro >= rp.data.length) {	console.log('sortie 2');exit(doc);return;}
		}
		if ( isNaN(u) || u < 0x4E00 || u > 0x9FFF ){console.log('sortie 3');exit(doc);return;}
		var text = rp.data.substr(ro, 12);
		fin='false'
		while ((text.length < 12) && (fin=='false' )) {
	
			if (rp.nextSibling==null){
				if (rp.parentNode.nextSibling==null) { fin='true';}
				else {
					rp=rp.parentNode.nextSibling;
					var txt=rp.textContent;
					for (var i=0;i<txt.length;i++){
						u=txt.charCodeAt(i);
						if ( isNaN(u) || u < 0x4E00 || u > 0x9FFF|| text.length>=12) { fin='true';break}
						text+=txt.charAt(i);
					}
				}
			}
			else {
				rp=rp.nextSibling; 
				var txt=rp.textContent;
				for (var i=0;i<txt.length;i++){
						u=txt.charCodeAt(i);
						if ( isNaN(u) || u < 0x4E00 || u > 0x9FFF|| text.length>=12) { fin='true';break}
						text+=txt.charAt(i);
				}
			}
		}
		var e = null;
		var html='';
		var dico=sortSearchDict(text);
		for (var i=0;i<dico.length;i++){
			//var split=splitNodeChinese(); 
			html+='<big>';
			for (var j=0;j<dico[i][WORD].length;j++){
				html+='<font color="'+color[parseInt(dico[i][TONE].charAt(j))]+'">'+dico[i][WORD].charAt(j)+'</font>';
			}
			//console.log(html);
			html+='</big>   '+dico[i][PINYIN]+'  '+dico[i][DEF]+'<br/>';
		}
		//if(!doc.getElementById('infobulle')) newelement('tooltip');
		console.log(html);
		var infotool = doc.getElementById('infotool');
		//doc.body.removeEventListener("DOMNodeInserted", f2, false);
		infotool.innerHTML = html;
		infotool.style.display = 'block';
		var ww=win.innerWidth;
		infotool.style.left = (win.pageXOffset+1)+'px';
		var width=doc.defaultView.getComputedStyle(infotool, null).getPropertyValue("width");
		width=width.substr(0,width.length-2);
		if ((tdata.popX+offsetx+parseInt(width))>ww) {infotool.style.left = (win.pageXOffset+ww-parseInt(width)-15)+'px';}
		else {infotool.style.left = win.pageXOffset+tdata.popX+offsetx+'px';}
        infotool.style.top = win.pageYOffset+tdata.popY+offsety+'px'//(mousey+pagey+offsety) + 'px';
		var width=doc.defaultView.getComputedStyle(infotool, null).getPropertyValue("width");
		
		//doc.body.addEventListener("DOMNodeInserted", f2, false);
}
function sortSearchDict(word){
	var sortDic=Array();
	var dico=searchDict(word);
	var refi='';
	var refj='';
	var k=0
	for (var i=0;i<dico.length;i++){
		if (dico[i][6]!='done'){
		    refi=dico[i][WORD]+dico[i][PINYIN];
			sortDic[k]= new Array();
			sortDic[k][WORD]=dico[i][WORD];
			sortDic[k][PINYIN]=dico[i][PINYIN];
			sortDic[k][TONE]=dico[i][TONE];
			sortDic[k][DEF]=dico[i][DEF];
			for( var j=i+1;j<dico.length;j++){
				if (refi==(dico[j][WORD]+dico[j][PINYIN])) {sortDic[k][DEF]=union(sortDic[k][DEF],dico[j][DEF]);dico[j][6]='done';}
			}
			k++
		}
	}
	return sortDic
}
function union(tab1,tab2){
	var ta1= (tab1+'/'+tab2).split('/');
	var ti= new Array();
	ti[0]=ta1[0];
	for (var i=1;i<ta1.length;i++){
		var test= new Boolean(false);
		for (var j=0;j<ti.length;j++) {if (ta1[i]==ti[j]) test=true;}
		if (test==false) {ti[ti.length]=ta1[i];}
	}
	return ti.join('/');
}

function exit(doc) { doc.getElementById('infotool').style.display = 'none';}

function colorise(document){
	var textnodes = document.evaluate("//text()",document.body,null,6,null);
	for (var i = 0; i < textnodes.snapshotLength; i++) {
		if (textnodes.snapshotItem(i).parentNode.getAttribute('colorised')!='yes'){
			replaceTextOfTextNode(document, textnodes.snapshotItem(i));
		}
	}
}
function replaceTextOfTextNode(document,node){
	var frag = document.createDocumentFragment();
	try {
		var text=node.textContent; // ca pourrait etre data
	}
	catch(err)  {
		txt="There was an error on this page.\n\n";
		txt+="Error description: " + err.description + "\n\n";
		console.log(txt);
		return
	}
	//GM_log(text); 
	var split=splitNodeChinese(text);
	if (split=='noHanzi'){return;}
	for (i=0;i<split[0].length;i++){
		if (split[1][i]=='8') {frag.appendChild(document.createTextNode(split[0][i]));}
		else {
			var font = document.createElement("FONT");
			var att = document.createAttribute("color");
			att.nodeValue = color[split[1][i]];
			var att2= document.createAttribute('colorised');
			att2.nodeValue='yes';
			font.setAttributeNode(att);
			font.setAttributeNode(att2);
			font.appendChild(document.createTextNode(split[0][i]));
			frag.appendChild(font);
		}
	}
	node.parentNode.replaceChild(frag,node);
}

// ca commence là ! creer l'index  (un tableau indexé par le premier charactere chinois des mots)
if ( typeof(dic) == "undefined") {
		//alert ("je passe par là");
		var current=0;
		var dictrie="";
		var nl=0;
		var code=0;
		var mincode=99999999;
		var maxcode=0
		dic = Array();
	
		while (current <myIdx.length-2) { //
			l=ligne();
			nl++;
			code=l.charCodeAt(0);
			if ((code<mincode)&(code>200)) {mincode=code;}
			if (code>maxcode) {maxcode=code;}
			if (dic[code]==undefined) {dic[code]="\n"+l;}
			else {dic[code]+=l;}
		}
}
else {alert ("好"+"好".charCodeAt(0)+dic["好".charCodeAt(0)]);}
//console.log('Index charge', dic['府'.charCodeAt(0)]);
// fin de chargement de l'index

jetpack.tabs.onReady(function InstallInfobulle(docu) {
	doc=docu;
	if (!doc.defaultView.frameElement) {
		var utf=doc.characterSet;
		if ((utf.substr(0,3)!='UTF')&&(utf.substr(0,2)!='GB')) {
			console.log ("pas du chinois apparemment",utf,utf.substr(0,3));
			return;
		}
		else {
			if ($(doc).find('#infotool').length==0){
			var el = doc.createElement('div'); 
			el.id = 'infotool';
			el.setAttribute("style","display:none ; position:absolute; padding:3px; background:#C0C0C0; border:2px solid #eee; z-index:999; text-align:left; font-size:16px; -moz-border-radius:10px;")
			el.innerHTML = 'je vous vois'; 
			doc.body.appendChild(el);
			doc.body.addEventListener("mousemove",f4=function(event){sourisBouge(event);},false);
		}
		//$(widget).find('#chin').text('中文 on');
	}
		//jetpack.notifications.show("Loaded " + doc.location.href);
		//jetpack.tabs.onReady.unbind(onNextPage); 
	}
});

jetpack.statusBar.append(
	{ 	html: "<head><meta http-equiv='Content-type' content='text/html; charset=utf-8' /></head><body><span id='chin' style='font-size:14px'>中文颜色</span></body>",
        width: 70,
        onReady: function(widget){
			$(widget).click(function(){
					doc = jetpack.tabs.focused.contentDocument;
					win = jetpack.tabs.focused.contentWindow;
					var utf=doc.characterSet;
					if ((utf.substr(0,3)!='UTF')&&(utf.substr(0,2)!='GB')) {
						console.log (utf,utf.substr(0,3));
						return;
					}
					else { colorise(doc);}

			});
		} 
	}
);
jetpack.future.import("selection");
jetpack.future.import("slideBar");
jetpack.slideBar.append({
  icon: "http://images.nciku.com/favicon.ico",
  width: 280,
  html: '<style>scrollbar * {display: none !important;}</style><iframe class="frame" height="99%" width="100%" src="http://m.nciku.com/"></frame>',                            
  onReady: function(slide) { jetpack.selection.onSelection(function() {
    $(".frame", slide.contentDocument).attr("src", "http://m.nciku.com/en/entry?query="  + jetpack.selection.text);
    });
  }
});
jetpack.slideBar.append({
  icon: "http://wikipedia.org/favicon.ico",
  width: 350,
  html: '<style>scrollbar * {display: none !important;}</style><iframe class="frame" height="99%" width="100%" src="http://en.m.wikipedia.org/wiki/"></frame>',                            
  onReady: function(slide) { jetpack.selection.onSelection(function() {
    $(".frame", slide.contentDocument).attr("src", "http://en.m.wikipedia.org/wiki/"  + jetpack.selection.text);
    });
  }
});