001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.math.statistics.normalisation; 031 032/** 033 * z-score normalisation (standardisation). Upon training, the mean and variance 034 * of each dimension is computed; normalisation works by subtracting the mean 035 * and dividing by the standard deviation. 036 * <p> 037 * This implementation includes an optional regularisation parameter that is 038 * added to the variance before the division. 039 * 040 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 041 * 042 */ 043public class ZScore implements TrainableNormaliser, Denormaliser { 044 double[] mean; 045 double[] sigma; 046 double eps = 0; 047 048 /** 049 * Construct without regularisation. 050 */ 051 public ZScore() { 052 } 053 054 /** 055 * Construct with regularisation. 056 * 057 * @param eps 058 * the variance normalisation regulariser (each dimension is 059 * divided by sqrt(var + eps). 060 */ 061 public ZScore(double eps) { 062 } 063 064 @Override 065 public void train(double[][] data) { 066 mean = new double[data[0].length]; 067 sigma = new double[data[0].length]; 068 069 for (int r = 0; r < data.length; r++) 070 for (int c = 0; c < data[0].length; c++) 071 mean[c] += data[r][c]; 072 073 for (int c = 0; c < data[0].length; c++) 074 mean[c] /= data.length; 075 076 for (int r = 0; r < data.length; r++) { 077 for (int c = 0; c < data[0].length; c++) { 078 final double delta = (data[r][c] - mean[c]); 079 sigma[c] += delta * delta; 080 } 081 } 082 083 for (int c = 0; c < data[0].length; c++) 084 sigma[c] = Math.sqrt(eps + (sigma[c] / (data.length - 1))); 085 } 086 087 @Override 088 public double[] normalise(double[] vector) { 089 final double[] out = new double[vector.length]; 090 for (int c = 0; c < out.length; c++) 091 out[c] = (vector[c] - mean[c]) / sigma[c]; 092 return out; 093 } 094 095 @Override 096 public double[][] normalise(double[][] data) { 097 final double[][] out = new double[data.length][]; 098 for (int c = 0; c < out.length; c++) 099 out[c] = normalise(data[c]); 100 return out; 101 } 102 103 @Override 104 public double[] denormalise(double[] vector) { 105 final double[] out = new double[vector.length]; 106 for (int c = 0; c < out.length; c++) 107 out[c] = sigma[c] * vector[c] + mean[c]; 108 return out; 109 } 110 111 @Override 112 public double[][] denormalise(double[][] data) { 113 final double[][] out = new double[data.length][]; 114 for (int c = 0; c < out.length; c++) 115 out[c] = denormalise(data[c]); 116 return out; 117 } 118}