ml-cb

This model is a fine-tuned version of dbernsohn/roberta-javascript on the ml-cb dataset. This model is trained on plaintext (as opposed to text processed with jsNice text or a JS obfuscator). More information about the project may be found at OSF.

This model is the transformer version of ml-cb. It achieves the following results on the evaluation set:

---EXAMPLE input #1: Canonical Fingerprinting (should be true)---
function fingerprint() {
    var canvas = document.createElement('canvas');
    var ctx = canvas.getContext('2d');
    var txt = 'i9asdm..$#po((^@KbXrww!~cz';
    ctx.textBaseline = 'top';
    ctx.font = '16px '
    Arial '';
    ctx.textBaseline = 'alphabetic';
    ctx.rotate(.05);
    ctx.fillStyle = '#f60';
    ctx.fillRect(125, 1, 62, 20);
    ctx.fillStyle = '#069';
    ctx.fillText(txt, 2, 15);
    ctx.fillStyle = 'rgba(102, 200, 0, 0.7)';
    ctx.fillText(txt, 4, 17);
    ctx.shadowBlur = 10;
    ctx.shadowColor = 'blue';
    ctx.fillRect(-20, 10, 234, 5);
    var strng = canvas.toDataURL();
}
---EXAMPLE input #1: WPemoji False Positive (should be false)---
window._wpemojiSettings = {
    'baseUrl': 'http:\/\/s.w.org\/images\/core\/emoji\/72x72\/',
    'ext': '.png',
    'source': {
        'concatemoji': 'http:\/\/basho.com\/wp-includes\/js\/wp-emoji-release.min.js?ver=4.2.2'
    }
};
! function(a, b, c) {
    function d(a) {
        var c = b.createElement('canvas'),
            d = c.getContext && c.getContext('2d');
        return d && d.fillText ? (d.textBaseline = 'top', d.font = '600 32px Arial', 'flag' === a ? (d.fillText(String.fromCharCode(55356, 56812, 55356, 56807), 0, 0), c.toDataURL().length > 3e3) : (d.fillText(String.fromCharCode(55357, 56835), 0, 0), 0 !== d.getImageData(16, 16, 1, 1).data[0])) : !1
    }

    function e(a) {
        var c = b.createElement('script');
        c.src = a, c.type = 'text/javascript', b.getElementsByTagName('head')[0].appendChild(c)
    }
    var f, g;
    c.supports = {
        simple: d('simple'),
        flag: d('flag')
    }, c.DOMReady = !1, c.readyCallback = function() {
        c.DOMReady = !0
    }, c.supports.simple && c.supports.flag || (g = function() {
        c.readyCallback()
    }, b.addEventListener ? (b.addEventListener('DOMContentLoaded', g, !1), a.addEventListener('load', g, !1)) : (a.attachEvent('onload', g), b.attachEvent('onreadystatechange', function() {
        'complete' === b.readyState && c.readyCallback()
    })), f = c.source || {}, f.concatemoji ? e(f.concatemoji) : f.wpemoji && f.twemoji && (e(f.twemoji), e(f.wpemoji)))
}(window, document, window._wpemojiSettings);

Intended uses

The model would be used to detect canvas fingerprinting in a JavaScript program. Canvas fingerprinting is stateless form of web tracking allowing companies to track you.

Limitations

Training and evaluation data

Training data cleaned from original dataset.

Training procedure

Training hyperparameters

The following hyperparameters were used during training:

Training results

Train Loss Validation Loss Train Accuracy Epoch
0.1217 0.0753 0.9741 0
0.0799 0.0651 0.9741 1
0.0639 0.0471 0.9870 2
0.0459 0.0539 0.9806 3
0.0357 0.0525 0.9849 4

Framework versions