diff --git a/notebooks/neural_networks/autoencoder.ipynb b/notebooks/neural_networks/autoencoder.ipynb index 9b1d534..e90e386 100644 --- a/notebooks/neural_networks/autoencoder.ipynb +++ b/notebooks/neural_networks/autoencoder.ipynb @@ -31,9 +31,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -91,9 +89,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -170,9 +166,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -214,9 +208,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -255,9 +247,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -293,9 +283,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -345,9 +333,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -395,9 +381,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.13" + "version": "2.7.16" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/notebooks/neural_networks/linear_regression.ipynb b/notebooks/neural_networks/linear_regression.ipynb index 19e9ce1..01ffa97 100644 --- a/notebooks/neural_networks/linear_regression.ipynb +++ b/notebooks/neural_networks/linear_regression.ipynb @@ -17,15 +17,14 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Populating the interactive namespace from numpy and matplotlib\n" + "Populating the interactive namespace from numpy and matplotlib\n", + "Prepending /usr/local/lib/python2.7/dist-packages/bigdl/share/conf/spark-bigdl.conf to sys.path\n" ] } ], @@ -67,9 +66,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "FEATURES_DIM = 2\n", @@ -100,9 +97,7 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -149,9 +144,7 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -161,7 +154,7 @@ "creating: createDefault\n", "creating: createSGD\n", "creating: createMaxEpoch\n", - "creating: createOptimizer\n" + "creating: createDistriOptimizer\n" ] } ], @@ -179,9 +172,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "# Start to train\n", @@ -198,9 +189,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -208,15 +197,15 @@ "text": [ "predict predict: \n", "\n", - "[ 2.35372853]\n", + "[2.9912615]\n", "\n", - "[ 2.34923339]\n", + "[2.7333193]\n", "\n", - "[ 2.75478125]\n", + "[1.6295706]\n", "\n", - "[ 1.48513186]\n", + "[1.8427728]\n", "\n", - "[ 0.81543505]\n", + "[2.1632674]\n", "\n" ] } @@ -248,15 +237,13 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "8.1123\n" + "0.0006544155222307509\n" ] } ], @@ -265,15 +252,13 @@ " np.random.seed(100)\n", " total_length = 10\n", " features = np.random.uniform(0, 1, (total_length, 2))\n", - " label = (features).sum() + 0.4\n", + " label = 2 * (features).sum(axis = 1) + 0.4\n", " predict_data = sc.parallelize(range(0, total_length)).map(\n", - " lambda i: Sample.from_ndarray(features[i], label))\n", + " lambda i: Sample.from_ndarray(features[i], label[i]))\n", " \n", " predict_result = trained_model.predict(predict_data)\n", " p = predict_result.take(6)\n", - " ground_label = np.array([[-0.47596836], [-0.37598032], [-0.00492062],\n", - " [-0.5906958], [-0.12307882], [-0.77907401]], dtype=\"float32\")\n", - " mse = ((p - ground_label) ** 2).mean()\n", + " mse = ((p - label[:6].reshape(6, 1)) ** 2).mean()\n", " print mse\n", " \n", "test_predict(trained_model)" @@ -296,7 +281,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.13" + "version": "2.7.15rc1" } }, "nbformat": 4, diff --git a/notebooks/spark_basics/DataFrame.ipynb b/notebooks/spark_basics/DataFrame.ipynb index 470f87d..cc11797 100644 --- a/notebooks/spark_basics/DataFrame.ipynb +++ b/notebooks/spark_basics/DataFrame.ipynb @@ -16,10 +16,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, + "execution_count": 1, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -28,14 +26,18 @@ "+--------------------+----+\n", "| address|name|\n", "+--------------------+----+\n", - "|[Los Angeles,Cali...| Bob|\n", + "|[Los Angeles, Cal...| Bob|\n", "+--------------------+----+\n", "\n" ] } ], "source": [ - "# Defines a Python list storing one JSON object.\n", + "from pyspark import SparkContext\n", + "from pyspark.sql import SparkSession\n", + "sc = SparkContext.getOrCreate()\n", + "spark = SparkSession(sc)\n", + "# Defines a Python list storingone JSON object.\n", "json_strings = ['{\"name\":\"Bob\",\"address\":{\"city\":\"Los Angeles\",\"state\":\"California\"}}', ]\n", "# Defines an RDD from the Python list.\n", "peopleRDD = sc.parallelize(json_strings)\n", @@ -61,9 +63,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -105,16 +105,16 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2.0 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.13" + "version": "2.7.16" } }, "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "nbformat_minor": 1 +} diff --git a/notebooks/spark_basics/RDD.ipynb b/notebooks/spark_basics/RDD.ipynb index 0b8bf83..1995033 100644 --- a/notebooks/spark_basics/RDD.ipynb +++ b/notebooks/spark_basics/RDD.ipynb @@ -17,20 +17,22 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "('world', 1)\n", - "('hello', 2)\n" + "('hello', 2)\n", + "('world', 1)\n" ] } ], "source": [ + "from pyspark import SparkContext\n", + "\n", + "sc = SparkContext.getOrCreate()\n", + "\n", "text_file = sc.parallelize([\"hello\",\"hello world\"])\n", "counts = text_file.flatMap(lambda line: line.split(\" \")) \\\n", " .map(lambda word: (word, 1)) \\\n", @@ -45,15 +47,6 @@ "source": [ "The first line defines a base RDD by parallelizing an existing Python list. The second line defines *counts* as the result of a few transformations. In the third line and fourth line, the program print all elements from counts by calling *collect()*. *collect()* is used to retrieve the entire RDD if the data are expected to fit in memory. For more RDD APIs, you can refer to the website [RDD APIs](http://spark.apache.org/docs/latest/programming-guide.html#resilient-distributed-datasets-rdds)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -72,7 +65,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.12" + "version": "2.7.16" } }, "nbformat": 4, diff --git a/notebooks/spark_basics/spark_sql.ipynb b/notebooks/spark_basics/spark_sql.ipynb index 85f36eb..641a876 100644 --- a/notebooks/spark_basics/spark_sql.ipynb +++ b/notebooks/spark_basics/spark_sql.ipynb @@ -23,10 +23,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, + "execution_count": 2, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -35,14 +33,19 @@ "+--------------------+----+\n", "| address|name|\n", "+--------------------+----+\n", - "|[Los Angeles,Cali...| Bob|\n", - "|[Seattle,Washington]|Adam|\n", + "|[Los Angeles, Cal...| Bob|\n", + "|[Seattle, Washing...|Adam|\n", "+--------------------+----+\n", "\n" ] } ], "source": [ + "from pyspark import SparkContext\n", + "from pyspark.sql import SparkSession\n", + "sc = SparkContext.getOrCreate()\n", + "spark = SparkSession(sc)\n", + "\n", "json_strings = ['{\"name\":\"Bob\",\"address\":{\"city\":\"Los Angeles\",\"state\":\"California\"}}', \n", " '{\"name\":\"Adam\",\"address\":{\"city\":\"Seattle\",\"state\":\"Washington\"}}']\n", "# Defines an RDD from the Python list.\n", @@ -61,10 +64,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, + "execution_count": 3, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -73,7 +74,7 @@ "+--------------------+----+\n", "| address|name|\n", "+--------------------+----+\n", - "|[Seattle,Washington]|Adam|\n", + "|[Seattle, Washing...|Adam|\n", "+--------------------+----+\n", "\n" ] @@ -110,7 +111,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.13" + "version": "2.7.16" } }, "nbformat": 4, diff --git a/notebooks/spark_basics/structured_streaming.ipynb b/notebooks/spark_basics/structured_streaming.ipynb index dfc5112..2ee2bf3 100644 --- a/notebooks/spark_basics/structured_streaming.ipynb +++ b/notebooks/spark_basics/structured_streaming.ipynb @@ -37,13 +37,13 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, + "execution_count": 3, + "metadata": {}, "outputs": [], "source": [ - "from pyspark.streaming import StreamingContext" + "from pyspark.streaming import StreamingContext\n", + "from pyspark import SparkContext\n", + "sc = SparkContext.getOrCreate()" ] }, { @@ -55,10 +55,8 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, + "execution_count": 4, + "metadata": {}, "outputs": [], "source": [ "# Create a local StreamingContext with two working thread and batch interval of 5 seconds\n", @@ -234,10 +232,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.10" + "version": "2.7.16" } }, "nbformat": 4, "nbformat_minor": 2 } -