From b0c13024f3e74b373774103102cb37dcf7891e12 Mon Sep 17 00:00:00 2001 From: Michael Chin Date: Wed, 14 Aug 2024 21:52:44 -0700 Subject: [PATCH 1/5] Update OC-RDF samples to load from regional S3 buckets --- .../01-Air-Routes.ipynb | 6 ++-- .../02-Air-Routes-GeoNames.ipynb | 34 ++++++++++++++++--- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb index 18303363..21bde9ce 100644 --- a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb +++ b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb @@ -26,8 +26,7 @@ "\n", "### The Air Routes Ontology (Schema)\n", "\n", - "The Air Routes ontology and data used in this notebook are available in a single NTRIPLES file on S3:\n", - "[s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt](s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt)\n", + "The Air Routes ontology and data used in this notebook are available in a single NTRIPLES file on S3: `s3://aws-neptune-customer-samples-[AWS_REGION_HERE]/airroutes-rdf/airroutes.nt`\n", "\n", "In this notebook, we work with a subset of the ontology, shown here:" ] @@ -56,8 +55,7 @@ "\n", "To create the graph, you must have the Air Routes NTRIPLES file loaded into your Neptune Analytics graph.\n", "\n", - "If you do not already have the data loaded, refer to the Neptune Analytics documentation for creating a graph from existing sources using the Air Routes ontology and NTRIPLES file available in S3:\n", - "[s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt](s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt)" + "If you do not already have the data loaded, refer to the Neptune Analytics documentation for creating a graph from existing sources using the Air Routes ontology and NTRIPLES files available in S3: `s3://aws-neptune-customer-samples-[AWS_REGION_HERE]/airroutes-rdf/airroutes.nt`. Ensure that you substitute the AWS Region identifier corresponding to that of your graph." ] }, { diff --git a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb index 262a6534..56e5d86d 100644 --- a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb +++ b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb @@ -48,6 +48,32 @@ "![Airroutes-GeoNames-111.png](attachment:Airroutes-GeoNames-111.png)" ] }, + { + "cell_type": "markdown", + "source": [ + "### Presets for data loading\n", + "\n", + "Before executing the GeoNames and Air Routes data loads that follow, run the following cell. This will set the load commands to use an S3 bucket in the same region as the graph - this is a requirement for batch load." + ], + "metadata": { + "collapsed": false + }, + "id": "8111ccacffd41586" + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "import graph_notebook as gn\n", + "\n", + "config = gn.configuration.get_config.get_config()\n", + "region = config.aws_region" + ], + "metadata": { + "collapsed": false + }, + "id": "98bf6d5e82a415f6" + }, { "cell_type": "markdown", "id": "c0950c69", @@ -69,8 +95,8 @@ "\n", "CALL neptune.load(\n", " {\n", - " source: \"s3://aws-neptune-customer-samples/geonames-rdf/geonames-cities-countries.nt\",\n", - " region: \"us-east-1\",\n", + " source: \"s3://aws-neptune-customer-samples-${region}/geonames-rdf/geonames-cities-countries.nt\",\n", + " region: \"${region}\",\n", " format: \"ntriples\",\n", " failOnError: true,\n", " blankNodeHandling: \"convertToIri\"\n", @@ -99,8 +125,8 @@ "\n", "CALL neptune.load(\n", " {\n", - " source: \"s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt\",\n", - " region: \"us-east-1\",\n", + " source: \"s3://aws-neptune-customer-samples-${region}/airroutes-rdf/airroutes.nt\",\n", + " region: \"${region}\",\n", " format: \"ntriples\",\n", " failOnError: true,\n", " blankNodeHandling: \"convertToIri\"\n", From e2767c2918fbecd1d3c20a05323ec113332f28eb Mon Sep 17 00:00:00 2001 From: Michael Chin Date: Wed, 14 Aug 2024 22:21:16 -0700 Subject: [PATCH 2/5] Switch 01-Air-Routes to batch load --- .../01-Air-Routes.ipynb | 61 ++++++++++++++++--- .../02-Air-Routes-GeoNames.ipynb | 33 ++++++---- 2 files changed, 71 insertions(+), 23 deletions(-) diff --git a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb index 21bde9ce..24d1d3d0 100644 --- a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb +++ b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb @@ -51,11 +51,52 @@ "source": [ "## Creating the Graph\n", "\n", - "This notebook assumes that you already have a Neptune Analytics graph with the Air Routes RDF data loaded.\n", + "Run the following two cells to load the Air Routes RDF graph, using [Neptune batch load](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/batch-load.html). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9b16519669e1e39", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import graph_notebook as gn\n", "\n", - "To create the graph, you must have the Air Routes NTRIPLES file loaded into your Neptune Analytics graph.\n", + "config = gn.configuration.get_config.get_config()\n", + "region = config.aws_region" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "115b0bc4a8524820", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc\n", "\n", - "If you do not already have the data loaded, refer to the Neptune Analytics documentation for creating a graph from existing sources using the Air Routes ontology and NTRIPLES files available in S3: `s3://aws-neptune-customer-samples-[AWS_REGION_HERE]/airroutes-rdf/airroutes.nt`. Ensure that you substitute the AWS Region identifier corresponding to that of your graph." + "CALL neptune.load(\n", + " {\n", + " source: \"s3://aws-neptune-customer-samples-${region}/airroutes-rdf/airroutes.nt\",\n", + " region: \"${region}\",\n", + " format: \"ntriples\",\n", + " failOnError: true,\n", + " blankNodeHandling: \"convertToIri\"\n", + " }\n", + ")" ] }, { @@ -82,7 +123,9 @@ "cell_type": "code", "execution_count": null, "id": "5a8776f5", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%%oc\n", @@ -319,11 +362,9 @@ }, { "cell_type": "markdown", - "source": [], - "metadata": { - "collapsed": false - }, - "id": "2fae2627ca1adc97" + "id": "2fae2627ca1adc97", + "metadata": {}, + "source": [] }, { "cell_type": "code", @@ -544,7 +585,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb index 56e5d86d..a19b675d 100644 --- a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb +++ b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb @@ -50,29 +50,32 @@ }, { "cell_type": "markdown", + "id": "8111ccacffd41586", + "metadata": {}, "source": [ "### Presets for data loading\n", "\n", "Before executing the GeoNames and Air Routes data loads that follow, run the following cell. This will set the load commands to use an S3 bucket in the same region as the graph - this is a requirement for batch load." - ], - "metadata": { - "collapsed": false - }, - "id": "8111ccacffd41586" + ] }, { "cell_type": "code", + "execution_count": null, + "id": "98bf6d5e82a415f6", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "tags": [] + }, "outputs": [], "source": [ "import graph_notebook as gn\n", "\n", "config = gn.configuration.get_config.get_config()\n", "region = config.aws_region" - ], - "metadata": { - "collapsed": false - }, - "id": "98bf6d5e82a415f6" + ] }, { "cell_type": "markdown", @@ -88,7 +91,9 @@ "cell_type": "code", "execution_count": null, "id": "11f981cb", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%%oc\n", @@ -118,7 +123,9 @@ "cell_type": "code", "execution_count": null, "id": "ea98a771", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%%oc\n", @@ -340,7 +347,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.13" } }, "nbformat": 4, From 7a9ee738902c58e1d0e421fd1d7766bafd77fb5d Mon Sep 17 00:00:00 2001 From: Michael Chin Date: Wed, 14 Aug 2024 22:27:20 -0700 Subject: [PATCH 3/5] update changelog --- ChangeLog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog.md b/ChangeLog.md index 8bfaf43d..9cc11a85 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -7,6 +7,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd - New Neptune Analytics notebooks - openCypher over RDF ([Link to PR](https://github.com/aws/graph-notebook/pull/672)) - Path: 02-Neptune-Analytics > 04-OpenCypher-Over-RDF - Added regional S3 bucket mappings to Neptune CloudFormation template ([Link to PR](https://github.com/aws/graph-notebook/pull/664)) +- Fixed cross-region load restriction issue in OC-RDF sample notebooks ([Link to PR](https://github.com/aws/graph-notebook/pull/676)) - Enabled n-triples data for `%load` with Neptune Analytics ([PR #1](https://github.com/aws/graph-notebook/pull/671)) ( ([PR #2](https://github.com/aws/graph-notebook/pull/675))) - Removed unused options from `%load`([Link to PR](https://github.com/aws/graph-notebook/pull/662)) - Made EncryptionKey optional in Neptune CloudFormation template ([Link to PR](https://github.com/aws/graph-notebook/pull/663)) From 58559a8ad085556593f46e93a7489fbfb2c887f0 Mon Sep 17 00:00:00 2001 From: Michael Chin Date: Wed, 14 Aug 2024 23:19:43 -0700 Subject: [PATCH 4/5] Use %load magic instead of CALL query --- src/graph_notebook/magics/graph_magic.py | 6 ++--- .../01-Air-Routes.ipynb | 18 ++----------- .../02-Air-Routes-GeoNames.ipynb | 27 +++---------------- 3 files changed, 9 insertions(+), 42 deletions(-) diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py index e39bd427..5b89ac97 100644 --- a/src/graph_notebook/magics/graph_magic.py +++ b/src/graph_notebook/magics/graph_magic.py @@ -2339,10 +2339,8 @@ def on_button_clicked(b): incremental_load_kwargs = { 'source': source.value, 'format': source_format.value, - 'concurrency': concurrency.value, + 'concurrency': concurrency.value } - if source.value == FORMAT_NTRIPLE: - incremental_load_kwargs['blankNodeHandling'] = 'convertToIri' kwargs.update(incremental_load_kwargs) else: bulk_load_kwargs = { @@ -2403,6 +2401,8 @@ def on_button_clicked(b): next_param = param + ': ' + value_substr load_oc_params += next_param if param == 'concurrency': + if source_format.value == FORMAT_NTRIPLE: + load_oc_params += ', blankNodeHandling: "convertToIri"' load_oc_params += '}' else: load_oc_params += ', ' diff --git a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb index 24d1d3d0..3facc17d 100644 --- a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb +++ b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb @@ -76,27 +76,13 @@ { "cell_type": "code", "execution_count": null, - "id": "115b0bc4a8524820", + "id": "5d200238-f85e-4caf-907d-c438b9a4647c", "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - }, "tags": [] }, "outputs": [], "source": [ - "%%oc\n", - "\n", - "CALL neptune.load(\n", - " {\n", - " source: \"s3://aws-neptune-customer-samples-${region}/airroutes-rdf/airroutes.nt\",\n", - " region: \"${region}\",\n", - " format: \"ntriples\",\n", - " failOnError: true,\n", - " blankNodeHandling: \"convertToIri\"\n", - " }\n", - ")" + "%load -s s3://aws-neptune-customer-samples-{region}/airroutes-rdf/airroutes.nt -r {region} -f ntriples --run" ] }, { diff --git a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb index a19b675d..3d08fe8a 100644 --- a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb +++ b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb @@ -74,7 +74,8 @@ "import graph_notebook as gn\n", "\n", "config = gn.configuration.get_config.get_config()\n", - "region = config.aws_region" + "region = config.aws_region\n", + "print(f\"Using region: {region}\")" ] }, { @@ -96,17 +97,7 @@ }, "outputs": [], "source": [ - "%%oc\n", - "\n", - "CALL neptune.load(\n", - " {\n", - " source: \"s3://aws-neptune-customer-samples-${region}/geonames-rdf/geonames-cities-countries.nt\",\n", - " region: \"${region}\",\n", - " format: \"ntriples\",\n", - " failOnError: true,\n", - " blankNodeHandling: \"convertToIri\"\n", - " }\n", - ")" + "%load -s s3://aws-neptune-customer-samples-{region}/geonames-rdf/geonames-cities-countries.nt -r {region} -f ntriples --run" ] }, { @@ -128,17 +119,7 @@ }, "outputs": [], "source": [ - "%%oc\n", - "\n", - "CALL neptune.load(\n", - " {\n", - " source: \"s3://aws-neptune-customer-samples-${region}/airroutes-rdf/airroutes.nt\",\n", - " region: \"${region}\",\n", - " format: \"ntriples\",\n", - " failOnError: true,\n", - " blankNodeHandling: \"convertToIri\"\n", - " }\n", - ")" + "%load -s s3://aws-neptune-customer-samples-{region}/airroutes-rdf/airroutes.nt -r {region} -f ntriples --run" ] }, { From dd590b510662533994ed16fd37ff8b9c6ae17384 Mon Sep 17 00:00:00 2001 From: Michael Chin Date: Wed, 14 Aug 2024 23:25:42 -0700 Subject: [PATCH 5/5] update changelog --- ChangeLog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 9cc11a85..7c58b8f2 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -6,8 +6,8 @@ Starting with v1.31.6, this file will contain a record of major features and upd - New Neptune Analytics notebooks - openCypher over RDF ([Link to PR](https://github.com/aws/graph-notebook/pull/672)) - Path: 02-Neptune-Analytics > 04-OpenCypher-Over-RDF +- Updated OC-RDF samples to use `%load` magic, and pull from regional S3 buckets ([Link to PR](https://github.com/aws/graph-notebook/pull/676)) - Added regional S3 bucket mappings to Neptune CloudFormation template ([Link to PR](https://github.com/aws/graph-notebook/pull/664)) -- Fixed cross-region load restriction issue in OC-RDF sample notebooks ([Link to PR](https://github.com/aws/graph-notebook/pull/676)) - Enabled n-triples data for `%load` with Neptune Analytics ([PR #1](https://github.com/aws/graph-notebook/pull/671)) ( ([PR #2](https://github.com/aws/graph-notebook/pull/675))) - Removed unused options from `%load`([Link to PR](https://github.com/aws/graph-notebook/pull/662)) - Made EncryptionKey optional in Neptune CloudFormation template ([Link to PR](https://github.com/aws/graph-notebook/pull/663))