diff --git a/plotly/tests/test_core/test_tools/test_figure_factory.py b/plotly/tests/test_core/test_tools/test_figure_factory.py index e4852ac816e..e3cdb357e8e 100644 --- a/plotly/tests/test_core/test_tools/test_figure_factory.py +++ b/plotly/tests/test_core/test_tools/test_figure_factory.py @@ -3,7 +3,6 @@ import datetime from nose.tools import raises - import plotly.tools as tls from plotly.exceptions import PlotlyError from plotly.graph_objs import graph_objs diff --git a/plotly/tests/test_optional/test_opt_tracefactory.py b/plotly/tests/test_optional/test_opt_tracefactory.py index 2c87c20c232..3187c4fdeb9 100644 --- a/plotly/tests/test_optional/test_opt_tracefactory.py +++ b/plotly/tests/test_optional/test_opt_tracefactory.py @@ -105,3 +105,135 @@ def test_simple_streamline(self): self.assertListEqual(strln['data'][0]['x'][0:100], expected_strln_0_100['x']) + +class TestDendrogram(TestCase): + + def test_default_dendrogram(self): + dendro = tls.FigureFactory.create_dendrogram(X=np.array([[1, 2, 3, 4], + [1, 1, 3, 4], + [1, 2, 1, 4], + [1, 2, 3, 1]])) + expected_data = [{'marker': {'color': 'rgb(255,133,27)'}, + 'mode': 'lines', 'xaxis': 'xs', + 'yaxis': 'y', + 'y': np.array([0., 1., 1., 0.]), + 'x': np.array([25., 25., 35., 35.]), + 'type': u'scatter'}, + {'marker': {'color': 'rgb(255,133,27)'}, + 'mode': 'lines', + 'xaxis': 'x', + 'yaxis': 'y', + 'y': np.array([0., 2.23606798, + 2.23606798, 1.]), + 'x': np.array([15., 15., 30., 30.]), + 'type': u'scatter'}, + {'marker': {'color': 'blue'}, + 'mode': 'lines', + 'xaxis': 'x', + 'yaxis': 'y', + 'y': np.array([0., 3.60555128, + 3.60555128, 2.23606798]), + 'x': np.array([5., 5., 22.5, 22.5]), + 'type': u'scatter'}] + expected_layout = {'width': '100%', + 'showlegend': False, + 'autoscale': False, + 'xaxis': {'showticklabels': True, + 'tickmode': 'array', + 'ticks': 'outside', + 'showgrid': False, + 'mirror': 'allticks', + 'zeroline': False, + 'showline': True, + 'ticktext': np.array(['3', '2', + '0', '1'], + dtype='|S1'), + 'rangemode': 'tozero', + 'type': 'linear', + 'tickvals': np.array([5.0, 15.0, + 25.0, 35.0])}, + 'yaxis': {'showticklabels': True, + 'ticks': 'outside', + 'showgrid': False, + 'mirror': 'allticks', + 'zeroline': False, + 'showline': True, + 'rangemode': 'tozero', + 'type': 'linear'}, + 'hovermode': 'closest'} + + # Make sure data is as expected + self.assertEqual(len(dendro['data']), len(expected_data)) + for i in range(1, len(dendro['data'])): + self.assertTrue(np.allclose(dendro['data'][i]['x'], + expected_data[i]['x'])) + self.assertTrue(np.allclose(dendro['data'][i]['y'], + expected_data[i]['y'])) + + # Make sure layout is as expected + self.assertTrue(np.array_equal(dendro['layout']['xaxis']['ticktext'], + expected_layout['xaxis']['ticktext'])) + self.assertTrue(np.array_equal(dendro['layout']['xaxis']['tickvals'], + expected_layout['xaxis']['tickvals'])) + self.assertEqual(dendro['layout']['xaxis']['ticks'], 'outside') + self.assertEqual(dendro['layout']['yaxis']['ticks'], 'outside') + self.assertEqual(dendro['layout']['width'], expected_layout['width']) + + def test_dendrogram_random_matrix(self): + # create a random uncorrelated matrix + X = np.random.rand(5, 5) + # variable 2 is correlated with all the other variables + X[2, :] = sum(X, 0) + + names = ['Jack', 'Oxana', 'John', 'Chelsea', 'Mark'] + dendro = tls.FigureFactory.create_dendrogram(X, labels=names) + + # Check that 2 is in a separate cluster and it's labelled correctly + self.assertEqual(dendro['layout']['xaxis']['ticktext'][0], 'John') + + def test_dendrogram_orientation(self): + X = np.random.rand(5, 5) + + dendro_left = tls.FigureFactory.create_dendrogram( + X, orientation='left') + self.assertEqual(len(dendro_left['layout']['yaxis']['ticktext']), 5) + tickvals_left = np.array(dendro_left['layout']['yaxis']['tickvals']) + self.assertTrue((tickvals_left <= 0).all()) + + dendro_right = tls.FigureFactory.create_dendrogram( + X, orientation='right') + tickvals_right = np.array(dendro_right['layout']['yaxis']['tickvals']) + self.assertTrue((tickvals_right >= 0).all()) + + dendro_bottom = tls.FigureFactory.create_dendrogram( + X, orientation='bottom') + self.assertEqual(len(dendro_bottom['layout']['xaxis']['ticktext']), 5) + tickvals_bottom = np.array(dendro_bottom['layout']['xaxis']['tickvals']) + self.assertTrue((tickvals_bottom >= 0).all()) + + dendro_top = tls.FigureFactory.create_dendrogram(X, orientation='top') + tickvals_top = np.array(dendro_top['layout']['xaxis']['tickvals']) + self.assertTrue((tickvals_top <= 0).all()) + + def test_dendrogram_orientation(self): + X = np.array([[1, 2, 3, 4], + [1, 1, 3, 4], + [1, 2, 1, 4], + [1, 2, 3, 1]]) + greyscale = [ + 'rgb(0,0,0)', # black + 'rgb(05,105,105)', # dim grey + 'rgb(128,128,128)', # grey + 'rgb(169,169,169)', # dark grey + 'rgb(192,192,192)', # silver + 'rgb(211,211,211)', # light grey + 'rgb(220,220,220)', # gainsboro + 'rgb(245,245,245)'] # white smoke + + dendro = tls.FigureFactory.create_dendrogram(X, colorscale=greyscale) + self.assertEqual(dendro["data"][0]['marker']['color'], + 'rgb(128,128,128)') + self.assertEqual(dendro["data"][1]['marker']['color'], + 'rgb(128,128,128)') + self.assertEqual(dendro["data"][2]['marker']['color'], + 'rgb(0,0,0)') diff --git a/plotly/tools.py b/plotly/tools.py index 590166906d6..e52554593b3 100644 --- a/plotly/tools.py +++ b/plotly/tools.py @@ -8,21 +8,19 @@ """ from __future__ import absolute_import +from collections import OrderedDict import os.path import warnings import six - import math - from plotly import utils from plotly import exceptions from plotly import session -from plotly.graph_objs import graph_objs -from plotly.graph_objs import Scatter, Marker +from plotly.graph_objs import graph_objs, Scatter, Marker, Line, Data # Warning format @@ -51,6 +49,25 @@ def warning_on_one_line(message, category, filename, lineno, except ImportError: _numpy_imported = False +try: + import scipy as scp + _scipy_imported = True +except ImportError: + _scipy_imported = False + +try: + import scipy.spatial as scs + _scipy__spatial_imported = True +except ImportError: + _scipy__spatial_imported = False + +try: + import scipy.cluster.hierarchy as sch + _scipy__cluster__hierarchy_imported = True +except ImportError: + _scipy__cluster__hierarchy_imported = False + + PLOTLY_DIR = os.path.join(os.path.expanduser("~"), ".plotly") CREDENTIALS_FILE = os.path.join(PLOTLY_DIR, ".credentials") CONFIG_FILE = os.path.join(PLOTLY_DIR, ".config") @@ -2275,6 +2292,57 @@ def create_candlestick(open, high, low, close, return dict(data=data, layout=layout) + @staticmethod + def create_dendrogram(X, orientation="bottom", labels=None, + colorscale=None): + """ + BETA function that returns a dendrogram Plotly figure object. + + :param (ndarray) X: Matrix of observations as arrray of arrays + :param (str) orientation: 'top', 'right', 'bottom', or 'left' + :param (list) labels: List of axis category labels(observation labels) + :param (list) colorscale: Optional colorscale for dendrogram tree + clusters + + Example 1: Simple bottom oriented dendrogram + ``` + import numpy as np + + import plotly.plotly as py + from plotly.tools import FigureFactory as FF + + X = np.random.rand(5,5) + dendro = FF.create_dendrogram(X) + py.iplot(dendro, validate=False, height=300, width=1000) + + ``` + + Example 2: Dendrogram to put on the left of the heatmap + ``` + X = np.random.rand(5,5) + names = ['Jack', 'Oxana', 'John', 'Chelsea', 'Mark'] + dendro = FF.create_dendrogram(X, orientation='right', labels=names) + + py.iplot(dendro, validate=False, height=1000, width=300) + ``` + """ + dependencies = (_scipy_imported and _scipy__spatial_imported and + _scipy__cluster__hierarchy_imported) + + if dependencies is False: + raise ImportError("FigureFactory.create_dendrogram requires scipy, \ + scipy.spatial and scipy.hierarchy") + + s = X.shape + if len(s) != 2: + exceptions.PlotlyError("X should be 2-dimensional array.") + + dendrogram = _Dendrogram(X, orientation, labels, colorscale) + + return {'layout': dendrogram.layout, + 'data': dendrogram.data} + + class _Quiver(FigureFactory): """ Refer to FigureFactory.create_quiver() for docstring @@ -2690,7 +2758,6 @@ def sum_streamlines(self): streamline_y = sum(self.st_y, []) return streamline_x, streamline_y - class _OHLC(FigureFactory): """ Refer to FigureFactory.create_ohlc_increase() for docstring. @@ -2871,3 +2938,206 @@ def get_candle_decrease(self): return (decrease_x, decrease_close, decrease_dif, stick_decrease_y, stick_decrease_x) + +class _Dendrogram(FigureFactory): + + """ + Refer to FigureFactory.create_dendrogram() for docstring. + """ + + def __init__(self, X, orientation='bottom', labels=None, colorscale=None, + width="100%", height="100%", xaxis='xaxis', yaxis='yaxis'): + self.orientation = orientation + self.labels = labels + self.xaxis = xaxis + self.yaxis = yaxis + self.data = [] + self.leaves = [] + self.sign = {self.xaxis: 1, self.yaxis: 1} + self.layout = {self.xaxis: {}, self.yaxis: {}} + + if self.orientation in ['left', 'bottom']: + self.sign[self.xaxis] = 1 + else: + self.sign[self.xaxis] = -1 + + if self.orientation in ['right', 'bottom']: + self.sign[self.yaxis] = 1 + else: + self.sign[self.yaxis] = -1 + + (dd_traces, xvals, yvals, + ordered_labels, leaves) = self.get_dendrogram_traces(X, colorscale) + + self.labels = ordered_labels + self.leaves = leaves + yvals_flat = yvals.flatten() + xvals_flat = xvals.flatten() + + self.zero_vals = [] + + for i in range(len(yvals_flat)): + if yvals_flat[i] == 0.0 and xvals_flat[i] not in self.zero_vals: + self.zero_vals.append(xvals_flat[i]) + + self.zero_vals.sort() + + self.layout = self.set_figure_layout(width, height) + self.data = Data(dd_traces) + + def get_color_dict(self, colorscale): + """ + Returns colorscale used for dendrogram tree clusters + :param (list) colorscale: colors to use for the plot, + in rgb format + :rtype (dict): returns a dictionary of default colors mapped + to the user colorscale + """ + + # These are the color codes returned for dendrograms + # We're replacing them with nicer colors + d = {'r': 'red', + 'g': 'green', + 'b': 'blue', + 'c': 'cyan', + 'm': 'magenta', + 'y': 'yellow', + 'k': 'black', + 'w': 'white'} + default_colors = OrderedDict(sorted(d.items(), key=lambda t: t[0])) + + if colorscale is None: + colorscale = [ + 'rgb(0,116,217)', # blue + 'rgb(35,205,205)', # cyan + 'rgb(61,153,112)', # green + 'rgb(40,35,35)', # black + 'rgb(133,20,75)', # magenta + 'rgb(255,65,54)', # red + 'rgb(255,255,255)', # white + 'rgb(255,220,0)'] # yellow + + for i in range(len(default_colors.keys())): + k = default_colors.keys()[i] + if i < len(colorscale): + default_colors[k] = colorscale[i] + + return default_colors + + def set_axis_layout(self, axis_key): + """ + Sets and returns default axis object for dendrogram figure + :param (str) axis_key: "xaxis", "xaxis1", "yaxis", yaxis1", etc. + :rtype (dict): returns an axis_key dictionary with set parameters + """ + + axis_defaults = { + 'type': 'linear', + 'ticks': 'outside', + 'mirror': 'allticks', + 'rangemode': 'tozero', + 'showticklabels': True, + 'zeroline': False, + 'showgrid': False, + 'showline': True, + } + + if len(self.labels) != 0: + axis_key_labels = self.xaxis + if self.orientation in ['left', 'right']: + axis_key_labels = self.yaxis + if axis_key_labels not in self.layout: + self.layout[axis_key_labels] = {} + self.layout[axis_key_labels]['tickvals'] = [zv*self.sign[axis_key] + for zv in self.zero_vals] + self.layout[axis_key_labels]['ticktext'] = self.labels + self.layout[axis_key_labels]['tickmode'] = 'array' + + self.layout[axis_key].update(axis_defaults) + + return self.layout[axis_key] + + def set_figure_layout(self, width, height): + """ + Sets and returns default layout object for dendrogram figure + """ + + self.layout.update({ + 'showlegend': False, + 'autoscale': False, + 'hovermode': 'closest', + 'width': width, + 'width': height + }) + + self.set_axis_layout(self.xaxis) + self.set_axis_layout(self.yaxis) + + return self.layout + + def get_dendrogram_traces(self, X, colorscale): + """ + Calculates all the elements needed for plotting a dendrogram + + :param (ndarray) X: Matrix of observations as arrray of arrays + :param (list) colorscale: Optional colorscale for dendrogram tree + clusters + + :rtype (tuple): Contains all the traces in the following order + (a) trace_list: List of Plotly trace objects for the dendrogram tree + (b) icoord: All X points of the dendogram tree as array of arrays + with length 4 + (c) dcoord: All Y points of the dendogram tree as array of arrays + with length 4 + (d) ordered_labels: leaf labels in the order they are going to + appear on the plot + (e) P['leaves']: left-to-right traversal of the leaves + """ + + d = scs.distance.pdist(X) + Z = sch.linkage(d, method='complete') + P = sch.dendrogram(Z, orientation=self.orientation, + labels=self.labels, no_plot=True) + + icoord = scp.array(P['icoord']) + dcoord = scp.array(P['dcoord']) + ordered_labels = scp.array(P['ivl']) + color_list = scp.array(P['color_list']) + colors = self.get_color_dict(colorscale) + + trace_list = [] + + for i in range(len(icoord)): + # xs and ys are arrays of 4 points that make up the '∩' shapes + # of the dendrogram tree + if self.orientation in ['top', 'bottom']: + xs = icoord[i] + else: + xs = dcoord[i] + + if self.orientation in ['top', 'bottom']: + ys = dcoord[i] + else: + ys = icoord[i] + color_key = color_list[i] + trace = Scatter(x=np.multiply(self.sign[self.xaxis], xs), + y=np.multiply(self.sign[self.yaxis], ys), + mode='lines', + marker=Marker(color=colors[color_key])) + + try: + x_index = int(self.xaxis[-1]) + except ValueError: + x_index = '' + + try: + y_index = int(self.yaxis[-1]) + except ValueError: + y_index = '' + + trace['xaxis'] = 'x' + x_index + trace['yaxis'] = 'y' + y_index + + trace_list.append(trace) + + return trace_list, icoord, dcoord, ordered_labels, P['leaves']