intel · GenBrg · Feb 26, 2019 · Mar 28, 2019 · Mar 28, 2019
diff --git a/notebooks/neural_networks/autoencoder.ipynb b/notebooks/neural_networks/autoencoder.ipynb
@@ -31,9 +31,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -91,9 +89,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -170,9 +166,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -214,9 +208,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -255,9 +247,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -293,9 +283,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -345,9 +333,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -395,9 +381,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.13"
+   "version": "2.7.16"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
diff --git a/notebooks/neural_networks/linear_regression.ipynb b/notebooks/neural_networks/linear_regression.ipynb
@@ -17,15 +17,14 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Populating the interactive namespace from numpy and matplotlib\n"
+      "Populating the interactive namespace from numpy and matplotlib\n",
+      "Prepending /usr/local/lib/python2.7/dist-packages/bigdl/share/conf/spark-bigdl.conf to sys.path\n"
      ]
     }
    ],
@@ -67,9 +66,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "FEATURES_DIM = 2\n",
@@ -100,9 +97,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -149,9 +144,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -161,7 +154,7 @@
       "creating: createDefault\n",
       "creating: createSGD\n",
       "creating: createMaxEpoch\n",
-      "creating: createOptimizer\n"
+      "creating: createDistriOptimizer\n"
      ]
     }
    ],
@@ -179,9 +172,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Start to train\n",
@@ -198,25 +189,23 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "predict predict: \n",
       "\n",
-      "[ 2.35372853]\n",
+      "[2.9912615]\n",
       "\n",
-      "[ 2.34923339]\n",
+      "[2.7333193]\n",
       "\n",
-      "[ 2.75478125]\n",
+      "[1.6295706]\n",
       "\n",
-      "[ 1.48513186]\n",
+      "[1.8427728]\n",
       "\n",
-      "[ 0.81543505]\n",
+      "[2.1632674]\n",
       "\n"
      ]
     }
@@ -248,15 +237,13 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "8.1123\n"
+      "0.0006544155222307509\n"
      ]
     }
    ],
@@ -265,15 +252,13 @@
     "    np.random.seed(100)\n",
     "    total_length = 10\n",
     "    features = np.random.uniform(0, 1, (total_length, 2))\n",
-    "    label = (features).sum() + 0.4\n",
+    "    label = 2 * (features).sum(axis = 1) + 0.4\n",
     "    predict_data = sc.parallelize(range(0, total_length)).map(\n",
-    "        lambda i: Sample.from_ndarray(features[i], label))\n",
+    "        lambda i: Sample.from_ndarray(features[i], label[i]))\n",
     "    \n",
     "    predict_result = trained_model.predict(predict_data)\n",
     "    p = predict_result.take(6)\n",
-    "    ground_label = np.array([[-0.47596836], [-0.37598032], [-0.00492062],\n",
-    "                                 [-0.5906958], [-0.12307882], [-0.77907401]], dtype=\"float32\")\n",
-    "    mse = ((p - ground_label) ** 2).mean()\n",
+    "    mse = ((p - label[:6].reshape(6, 1)) ** 2).mean()\n",
     "    print mse\n",
     "    \n",
     "test_predict(trained_model)"
@@ -296,7 +281,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.13"
+   "version": "2.7.15rc1"
   }
  },
  "nbformat": 4,

diff --git a/notebooks/spark_basics/DataFrame.ipynb b/notebooks/spark_basics/DataFrame.ipynb
@@ -16,10 +16,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": 1,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -28,14 +26,18 @@
       "+--------------------+----+\n",
       "|             address|name|\n",
       "+--------------------+----+\n",
-      "|[Los Angeles,Cali...| Bob|\n",
+      "|[Los Angeles, Cal...| Bob|\n",
       "+--------------------+----+\n",
       "\n"
      ]
     }
    ],
    "source": [
-    "# Defines a Python list storing one JSON object.\n",
+    "from pyspark import SparkContext\n",
+    "from pyspark.sql import SparkSession\n",
+    "sc = SparkContext.getOrCreate()\n",
+    "spark = SparkSession(sc)\n",
+    "# Defines a Python list storingone JSON object.\n",
     "json_strings = ['{\"name\":\"Bob\",\"address\":{\"city\":\"Los Angeles\",\"state\":\"California\"}}', ]\n",
     "# Defines an RDD from the Python list.\n",
     "peopleRDD = sc.parallelize(json_strings)\n",
@@ -61,9 +63,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -105,16 +105,16 @@
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2.0
+    "version": 2
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.13"
+   "version": "2.7.16"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
-}
+ "nbformat_minor": 1
+}
diff --git a/notebooks/spark_basics/RDD.ipynb b/notebooks/spark_basics/RDD.ipynb
@@ -17,20 +17,22 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "('world', 1)\n",
-      "('hello', 2)\n"
+      "('hello', 2)\n",
+      "('world', 1)\n"
      ]
     }
    ],
    "source": [
+    "from pyspark import SparkContext\n",
+    "\n",
+    "sc = SparkContext.getOrCreate()\n",
+    "\n",
     "text_file = sc.parallelize([\"hello\",\"hello world\"])\n",
     "counts = text_file.flatMap(lambda line: line.split(\" \")) \\\n",
     "             .map(lambda word: (word, 1)) \\\n",
@@ -45,15 +47,6 @@
    "source": [
     "The first line defines a base RDD by parallelizing an existing Python list. The second line defines *counts* as the result of a few transformations. In the third line and fourth line, the program print all elements from counts by calling *collect()*. *collect()* is used to retrieve the entire RDD if the data are expected to fit in memory. For more RDD APIs, you can refer to the website [RDD APIs](http://spark.apache.org/docs/latest/programming-guide.html#resilient-distributed-datasets-rdds)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -72,7 +65,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython2",
-   "version": "2.7.12"
+   "version": "2.7.16"
   }
  },
  "nbformat": 4,