diff --git a/ChangeLog.md b/ChangeLog.md index 8bfaf43d..7c58b8f2 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -6,6 +6,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd - New Neptune Analytics notebooks - openCypher over RDF ([Link to PR](https://github.com/aws/graph-notebook/pull/672)) - Path: 02-Neptune-Analytics > 04-OpenCypher-Over-RDF +- Updated OC-RDF samples to use `%load` magic, and pull from regional S3 buckets ([Link to PR](https://github.com/aws/graph-notebook/pull/676)) - Added regional S3 bucket mappings to Neptune CloudFormation template ([Link to PR](https://github.com/aws/graph-notebook/pull/664)) - Enabled n-triples data for `%load` with Neptune Analytics ([PR #1](https://github.com/aws/graph-notebook/pull/671)) ( ([PR #2](https://github.com/aws/graph-notebook/pull/675))) - Removed unused options from `%load`([Link to PR](https://github.com/aws/graph-notebook/pull/662)) diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py index e39bd427..5b89ac97 100644 --- a/src/graph_notebook/magics/graph_magic.py +++ b/src/graph_notebook/magics/graph_magic.py @@ -2339,10 +2339,8 @@ def on_button_clicked(b): incremental_load_kwargs = { 'source': source.value, 'format': source_format.value, - 'concurrency': concurrency.value, + 'concurrency': concurrency.value } - if source.value == FORMAT_NTRIPLE: - incremental_load_kwargs['blankNodeHandling'] = 'convertToIri' kwargs.update(incremental_load_kwargs) else: bulk_load_kwargs = { @@ -2403,6 +2401,8 @@ def on_button_clicked(b): next_param = param + ': ' + value_substr load_oc_params += next_param if param == 'concurrency': + if source_format.value == FORMAT_NTRIPLE: + load_oc_params += ', blankNodeHandling: "convertToIri"' load_oc_params += '}' else: load_oc_params += ', ' diff --git a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb index 18303363..3facc17d 100644 --- a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb +++ b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb @@ -26,8 +26,7 @@ "\n", "### The Air Routes Ontology (Schema)\n", "\n", - "The Air Routes ontology and data used in this notebook are available in a single NTRIPLES file on S3:\n", - "[s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt](s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt)\n", + "The Air Routes ontology and data used in this notebook are available in a single NTRIPLES file on S3: `s3://aws-neptune-customer-samples-[AWS_REGION_HERE]/airroutes-rdf/airroutes.nt`\n", "\n", "In this notebook, we work with a subset of the ontology, shown here:" ] @@ -52,12 +51,38 @@ "source": [ "## Creating the Graph\n", "\n", - "This notebook assumes that you already have a Neptune Analytics graph with the Air Routes RDF data loaded.\n", - "\n", - "To create the graph, you must have the Air Routes NTRIPLES file loaded into your Neptune Analytics graph.\n", + "Run the following two cells to load the Air Routes RDF graph, using [Neptune batch load](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/batch-load.html). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9b16519669e1e39", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import graph_notebook as gn\n", "\n", - "If you do not already have the data loaded, refer to the Neptune Analytics documentation for creating a graph from existing sources using the Air Routes ontology and NTRIPLES file available in S3:\n", - "[s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt](s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt)" + "config = gn.configuration.get_config.get_config()\n", + "region = config.aws_region" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d200238-f85e-4caf-907d-c438b9a4647c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%load -s s3://aws-neptune-customer-samples-{region}/airroutes-rdf/airroutes.nt -r {region} -f ntriples --run" ] }, { @@ -84,7 +109,9 @@ "cell_type": "code", "execution_count": null, "id": "5a8776f5", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%%oc\n", @@ -321,11 +348,9 @@ }, { "cell_type": "markdown", - "source": [], - "metadata": { - "collapsed": false - }, - "id": "2fae2627ca1adc97" + "id": "2fae2627ca1adc97", + "metadata": {}, + "source": [] }, { "cell_type": "code", @@ -546,7 +571,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb index 262a6534..3d08fe8a 100644 --- a/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb +++ b/src/graph_notebook/notebooks/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb @@ -48,6 +48,36 @@ "![Airroutes-GeoNames-111.png](attachment:Airroutes-GeoNames-111.png)" ] }, + { + "cell_type": "markdown", + "id": "8111ccacffd41586", + "metadata": {}, + "source": [ + "### Presets for data loading\n", + "\n", + "Before executing the GeoNames and Air Routes data loads that follow, run the following cell. This will set the load commands to use an S3 bucket in the same region as the graph - this is a requirement for batch load." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98bf6d5e82a415f6", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import graph_notebook as gn\n", + "\n", + "config = gn.configuration.get_config.get_config()\n", + "region = config.aws_region\n", + "print(f\"Using region: {region}\")" + ] + }, { "cell_type": "markdown", "id": "c0950c69", @@ -62,20 +92,12 @@ "cell_type": "code", "execution_count": null, "id": "11f981cb", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "%%oc\n", - "\n", - "CALL neptune.load(\n", - " {\n", - " source: \"s3://aws-neptune-customer-samples/geonames-rdf/geonames-cities-countries.nt\",\n", - " region: \"us-east-1\",\n", - " format: \"ntriples\",\n", - " failOnError: true,\n", - " blankNodeHandling: \"convertToIri\"\n", - " }\n", - ")" + "%load -s s3://aws-neptune-customer-samples-{region}/geonames-rdf/geonames-cities-countries.nt -r {region} -f ntriples --run" ] }, { @@ -92,20 +114,12 @@ "cell_type": "code", "execution_count": null, "id": "ea98a771", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "%%oc\n", - "\n", - "CALL neptune.load(\n", - " {\n", - " source: \"s3://aws-neptune-customer-samples/airroutes-rdf/airroutes.nt\",\n", - " region: \"us-east-1\",\n", - " format: \"ntriples\",\n", - " failOnError: true,\n", - " blankNodeHandling: \"convertToIri\"\n", - " }\n", - ")" + "%load -s s3://aws-neptune-customer-samples-{region}/airroutes-rdf/airroutes.nt -r {region} -f ntriples --run" ] }, { @@ -314,7 +328,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.13" } }, "nbformat": 4,