Prev:
References

Appendix 1

1             

Summary. 1

Chapter 1 - Introduction. 12

Chapter 2 - Research Method and Design. 18

Chapter 3 - Literature Review.. 36

Chapter 4 - Context Interviews. 56

Chapter 5 - Conceptual Study. 84

Chapter 6 - Simulations. 124

Chapter 7 - Research Evaluation. 166

Chapter 8 - Conclusion. 180

References. 184

Appendix 1. 194


Appendix 1

Source code for Garbling Noise Process

The below source code (in JavaScript) implements the “garbling” noise process described in Section 6.3.3.

The code was executed in Internet Explorer as a web page. It accepts a dataset by pasting the values into a HTML form. This JavaScript code then iterates over the dataset, garbling the rows on an attribute-by-attribute basis for a range of different garbling levels. The resulting garbled datasets are output to disk in industry-standard ARFF files for subsequent analysis by the RapidMiner data mining workbench.

<script type="text/javascript">

// Global variable

DataSet = new Array(); // dataset is two dimensional array

GarbleDS = new Array();

Imputed = new Array();

Header = "";

function loadData() {

// Load data from web page

alert('loading data ...');

data = document.details.dataset.value;

iter = document.details.iter.value;

 

var cols=[];

var rows=data.split('\n');

alert('Found '+rows.length+' rows');

 

DataSet=[];

for(var r=0; r<rows.length; r++) {

cols=rows[r].replace(/[\n\r\s]/ig,'').split(',');

DataSet.push(cols);

}

alert('Found '+DataSet[0].length+' cols');

displayData(DataSet);

return;

}

 

function displayData(d) {

// Display sample data on web page

var m=document.details.rowmax.value;

var t=document.getElementById('datatable');

var tr, td, cb;

var ch;

while(ch=t.firstChild) // delete existing rows

t.removeChild(ch);

tr=document.createElement('tr'); // insert ordinal value checkboxes

for(var a=0; a<d[0].length; a++) {

td=document.createElement('td');

cb=document.createElement('input');

cb.type="checkbox";

cb.id="cb"+a;

td.appendChild(cb);

tr.appendChild(td);

}

t.appendChild(tr);

for (var r=0; r<m; r++) {

tr=document.createElement('tr');

for(var c=0; c<d[r].length; c++) {

td=document.createElement('td');

td.innerHTML=d[r][c];

tr.appendChild(td);

}

t.appendChild(tr);

}

return;

}

 

function imputeData() {

// Estimate and replace missing values (if required)

var tr, t, filename, iv;

var maxiter=document.details.maxiter.value;

var d=document.details.droprate.value/100;

var ord=0;

var cat={}, Cats=[];

var catmax;

var gCount=0, dCount=0;

 

alert('Calculating imputed values ...');

 

for (var a=0; a<DataSet[0].length; a++) { // get imputed value

if (document.getElementById('cb'+a).checked) { // is it ordinal or nominal?

ord=0; // ordinal

for (var r=0; r<DataSet.length; r++)

if (DataSet[r][a].search(/[?]/)==-1) { // test for missing value

ord+=parseFloat(DataSet[r][a]);

Imputed[a] = ord/DataSet.length;

// get mean value

}

else {

cat ={}; // categorical

cat['!temp!']=-1;

for (var r=0; r<DataSet.length; r++) if(cat[DataSet[r][a]])

cat[DataSet[r][a]]++;

else

cat[DataSet[r][a]]=1;

catmax='!temp!'; Cats[a]="";

for (var c in cat) {

Cats[a]+=c+", ";

if (cat[c]>cat[catmax])

catmax=c;

}

Cats[a]=" {"+Cats[a].replace('!temp!,','').replace('?,','')+"}";

Cats[a]=Cats[a].replace(/\n|\r/ig,'');

Cats[a]=Cats[a].replace(/,\s*}/ig,'}\n');

Imputed[a] = catmax; // get mode value

}

}

alert('Inserting imputed values ...');

var t=document.getElementById('datatable');

tr=document.createElement('tr'); // insert imputed values

for(var a=0; a<DataSet[0].length; a++) {

td=document.createElement('td');

iv=document.createElement('input');

iv.type="text";

iv.id="iv"+a;

iv.value=Imputed[a]

if(iv.value.length>5)

iv.size="5";

else

iv.size=iv.value.length;

td.appendChild(iv);

tr.appendChild(td);

}

t.appendChild(tr);

alert('Building ARFF header ...');

Header="";

for (var a=0; a<DataSet[0].length; a++) {

Header+="@ATTRIBUTE a"+a;

if (document.getElementById('cb'+a).checked) // is it ordinal or nominal?

Header+=" NUMERIC\n";

else

Header+=Cats[a];

}

Header+="@DATA\n";

alert('Header: '+Header);

return;

}

function garbleData() {

// Function to apply garbling noise process

alert('garbling data ...');

var maxiter=document.details.maxiter.value;

var d=document.details.droprate.value/100;

for (var a=0; a<DataSet[0].length-1; a++) { // for each attribute (exclude class)

for(i=1; i<=maxiter; i++) {

// for each iteration, starting with 1/maxiter probability of garbling

gCount=0, dCount=0, eCount=0;

GarbleDS=[];

for(var r=0; r<DataSet.length; r++) { // clone original dataset

row=DataSet[r].toString();

GarbleDS[r]=row.split(',');

}

for(var r=0; r<DataSet.length; r++) { // for each row

if (Math.random()<=d || GarbleDS[r][a].toString().search(/[?]/)==0) {

// if "success" or ? then drop

GarbleDS[r][a]=Imputed[a];// insert imputed value

dCount++;

}

var p=i/maxiter;

if (Math.random()<=p) { // if "success" then swap

do {

var t=Math.floor(Math.random()*DataSet.length)

// pick target

temp=GarbleDS[t][a]; // swap with current with target

GarbleDS[t][a]=GarbleDS[r][a];

GarbleDS[r][a]=temp;

} while (document.details.toggle.checked && t==r)

gCount++;

if (GarbleDS[t][a]!=GarbleDS[r][a])

eCount++;

}

}

document.details.attr.value=a;

document.details.iter.value=i;

document.details.garbles.value=gCount;

document.details.drops.value=dCount;

document.details.errors.value=eCount;

filename=document.details.setname.value+"-a"+a+"-i-"+i;

document.details.outfile.value=filename;

 

if (document.details.writefile.checked)

writeToFile(GarbleDS.join('\n').toString());

}

return;

}

 

function writeToFile(writeStr) {

// Code for writing tables as text file

// IE specific

// http://www.webreference.com/js/tips/001031.html

 

var fname= document.details.basedir.value+document.details.outfile.value+'.arff';

//alert('filename: '+fname+'\nString: '+Header+'\n\n'+writeStr);

var TristateFalse = 0;

var ForWriting = 2;

try {

var myActiveXObject = new ActiveXObject("Scripting.FileSystemObject");

}

catch(e) {

alert('Cannot write to file - failed creating ActiveXObject.');

document.details.writefile.checked=false;

return;

}

myActiveXObject.CreateTextFile(fname);

var file = myActiveXObject.GetFile(fname);

var text = file.OpenAsTextStream(ForWriting, TristateFalse);

//text.Write('@RELATION '+document.details.outfile.value+'\n'+Header+'\n\n'+DataSet.join('\n').toString()+'\n'+writeStr); // include original

text.Write('%eCount '+document.details.errors.value+'\n'+'@RELATION '+document.details.outfile.value+'\n'+Header+'\n\n'+writeStr); // garbled only

text.Close();

}

</script>


 

 

Prev:
References
Up:
Contents