Prev: References | Appendix 1 |
1
Chapter 2 - Research Method and Design
Chapter 4 - Context Interviews
Chapter 7 - Research Evaluation
Source code for Garbling Noise Process
The below source code (in JavaScript) implements the “garbling” noise process described in Section 6.3.3.
The code was executed in Internet Explorer as a web page. It accepts a dataset by pasting the values into a HTML form. This JavaScript code then iterates over the dataset, garbling the rows on an attribute-by-attribute basis for a range of different garbling levels. The resulting garbled datasets are output to disk in industry-standard ARFF files for subsequent analysis by the RapidMiner data mining workbench.
<script type="text/javascript">
// Global variable
DataSet = new Array(); // dataset is two dimensional array
GarbleDS = new Array();
Imputed = new Array();
Header = "";
function loadData() {
// Load data from web page
alert('loading data ...');
data = document.details.dataset.value;
iter = document.details.iter.value;
var cols=[];
var rows=data.split('\n');
alert('Found '+rows.length+' rows');
DataSet=[];
for(var r=0; r<rows.length; r++) {
cols=rows[r].replace(/[\n\r\s]/ig,'').split(',');
DataSet.push(cols);
}
alert('Found '+DataSet[0].length+' cols');
displayData(DataSet);
return;
}
function displayData(d) {
// Display sample data on web page
var m=document.details.rowmax.value;
var t=document.getElementById('datatable');
var tr, td, cb;
var ch;
while(ch=t.firstChild) // delete existing rows
t.removeChild(ch);
tr=document.createElement('tr'); // insert ordinal value checkboxes
for(var a=0; a<d[0].length; a++) {
td=document.createElement('td');
cb=document.createElement('input');
cb.type="checkbox";
cb.id="cb"+a;
td.appendChild(cb);
tr.appendChild(td);
}
t.appendChild(tr);
for (var r=0; r<m; r++) {
tr=document.createElement('tr');
for(var c=0; c<d[r].length; c++) {
td=document.createElement('td');
td.innerHTML=d[r][c];
tr.appendChild(td);
}
t.appendChild(tr);
}
return;
}
function imputeData() {
// Estimate and replace missing values (if required)
var tr, t, filename, iv;
var maxiter=document.details.maxiter.value;
var d=document.details.droprate.value/100;
var ord=0;
var cat={}, Cats=[];
var catmax;
var gCount=0, dCount=0;
alert('Calculating imputed values ...');
for (var a=0; a<DataSet[0].length; a++) { // get imputed value
if (document.getElementById('cb'+a).checked) { // is it ordinal or nominal?
ord=0; // ordinal
for (var r=0; r<DataSet.length; r++)
if (DataSet[r][a].search(/[?]/)==-1) { // test for missing value
ord+=parseFloat(DataSet[r][a]);
Imputed[a] = ord/DataSet.length;
// get mean value
}
else {
cat ={}; // categorical
cat['!temp!']=-1;
for (var r=0; r<DataSet.length; r++) if(cat[DataSet[r][a]])
cat[DataSet[r][a]]++;
else
cat[DataSet[r][a]]=1;
catmax='!temp!'; Cats[a]="";
for (var c in cat) {
Cats[a]+=c+", ";
if (cat[c]>cat[catmax])
catmax=c;
}
Cats[a]=" {"+Cats[a].replace('!temp!,','').replace('?,','')+"}";
Cats[a]=Cats[a].replace(/\n|\r/ig,'');
Cats[a]=Cats[a].replace(/,\s*}/ig,'}\n');
Imputed[a] = catmax; // get mode value
}
}
alert('Inserting imputed values ...');
var t=document.getElementById('datatable');
tr=document.createElement('tr'); // insert imputed values
for(var a=0; a<DataSet[0].length; a++) {
td=document.createElement('td');
iv=document.createElement('input');
iv.type="text";
iv.id="iv"+a;
iv.value=Imputed[a]
if(iv.value.length>5)
iv.size="5";
else
iv.size=iv.value.length;
td.appendChild(iv);
tr.appendChild(td);
}
t.appendChild(tr);
alert('Building ARFF header ...');
Header="";
for (var a=0; a<DataSet[0].length; a++) {
Header+="@ATTRIBUTE a"+a;
if (document.getElementById('cb'+a).checked) // is it ordinal or nominal?
Header+=" NUMERIC\n";
else
Header+=Cats[a];
}
Header+="@DATA\n";
alert('Header: '+Header);
return;
}
function garbleData() {
// Function to apply garbling noise process
alert('garbling data ...');
var maxiter=document.details.maxiter.value;
var d=document.details.droprate.value/100;
for (var a=0; a<DataSet[0].length-1; a++) { // for each attribute (exclude class)
for(i=1; i<=maxiter; i++) {
// for each iteration, starting with 1/maxiter probability of garbling
gCount=0, dCount=0, eCount=0;
GarbleDS=[];
for(var r=0; r<DataSet.length; r++) { // clone original dataset
row=DataSet[r].toString();
GarbleDS[r]=row.split(',');
}
for(var r=0; r<DataSet.length; r++) { // for each row
if (Math.random()<=d || GarbleDS[r][a].toString().search(/[?]/)==0) {
// if "success" or ? then drop
GarbleDS[r][a]=Imputed[a];// insert imputed value
dCount++;
}
var p=i/maxiter;
if (Math.random()<=p) { // if "success" then swap
do {
var t=Math.floor(Math.random()*DataSet.length)
// pick target
temp=GarbleDS[t][a]; // swap with current with target
GarbleDS[t][a]=GarbleDS[r][a];
GarbleDS[r][a]=temp;
} while (document.details.toggle.checked && t==r)
gCount++;
if (GarbleDS[t][a]!=GarbleDS[r][a])
eCount++;
}
}
document.details.attr.value=a;
document.details.iter.value=i;
document.details.garbles.value=gCount;
document.details.drops.value=dCount;
document.details.errors.value=eCount;
filename=document.details.setname.value+"-a"+a+"-i-"+i;
document.details.outfile.value=filename;
if (document.details.writefile.checked)
writeToFile(GarbleDS.join('\n').toString());
}
return;
}
function writeToFile(writeStr) {
// Code for writing tables as text file
// IE specific
// http://www.webreference.com/js/tips/001031.html
var fname= document.details.basedir.value+document.details.outfile.value+'.arff';
//alert('filename: '+fname+'\nString: '+Header+'\n\n'+writeStr);
var TristateFalse = 0;
var ForWriting = 2;
try {
var myActiveXObject = new ActiveXObject("Scripting.FileSystemObject");
}
catch(e) {
alert('Cannot write to file - failed creating ActiveXObject.');
document.details.writefile.checked=false;
return;
}
myActiveXObject.CreateTextFile(fname);
var file = myActiveXObject.GetFile(fname);
var text = file.OpenAsTextStream(ForWriting, TristateFalse);
//text.Write('@RELATION '+document.details.outfile.value+'\n'+Header+'\n\n'+DataSet.join('\n').toString()+'\n'+writeStr); // include original
text.Write('%eCount '+document.details.errors.value+'\n'+'@RELATION '+document.details.outfile.value+'\n'+Header+'\n\n'+writeStr); // garbled only
text.Close();
}
</script>
Prev: References | Up: Contents |