diff --git a/ChangeLog.md b/ChangeLog.md index 7451b048..edcf5a4e 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,11 +4,12 @@ Starting with v1.31.6, this file will contain a record of major features and upd ## Upcoming +- Updated Gremlin config `message_serializer` to accept all TinkerPop serializers ([Link to PR](https://github.com/aws/graph-notebook/pull/685)) +- Added `%get_import_task` line magic ([Link to PR](https://github.com/aws/graph-notebook/pull/668)) +- Added `--export-to` JSON file option to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/684)) - Deprecated Python 3.8 support ([Link to PR](https://github.com/aws/graph-notebook/pull/683)) - Upgraded Neo4j Bolt driver to v5.x ([Link to PR](https://github.com/aws/graph-notebook/pull/682)) - Upgraded nest_asyncio to 1.6.0 ([Link to PR](https://github.com/aws/graph-notebook/pull/698)) -- Added `%get_import_task` line magic ([Link to PR](https://github.com/aws/graph-notebook/pull/668)) -- Added `--export-to` JSON file option to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/684)) - Improved iPython config directory retrieval logic ([Link to PR](https://github.com/aws/graph-notebook/pull/687)) - Fixed `%db_reset` output for token modes ([Link to PR](https://github.com/aws/graph-notebook/pull/691)) - Fixed `%%gremlin profile` serialization issue on Neptune DB v1.2 and older ([Link to PR](https://github.com/aws/graph-notebook/pull/694)) diff --git a/src/graph_notebook/configuration/generate_config.py b/src/graph_notebook/configuration/generate_config.py index c57e351e..83f58038 100644 --- a/src/graph_notebook/configuration/generate_config.py +++ b/src/graph_notebook/configuration/generate_config.py @@ -14,7 +14,7 @@ HTTP_PROTOCOL_FORMATS, WS_PROTOCOL_FORMATS, DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE, NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants, - GRAPHSONV3_VARIANTS, GRAPHSONV2_VARIANTS, GRAPHBINARYV1_VARIANTS, + GRAPHBINARYV1, GREMLIN_SERIALIZERS_HTTP, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, normalize_service_name) @@ -73,17 +73,29 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str serializer_lower = message_serializer.lower() # TODO: Update with untyped serializers once supported in GremlinPython + # Accept TinkerPop serializer class name + # https://github.com/apache/tinkerpop/blob/fd040c94a66516e473811fe29eaeaf4081cf104c/docs/src/reference/gremlin-applications.asciidoc#graphson + # https://github.com/apache/tinkerpop/blob/fd040c94a66516e473811fe29eaeaf4081cf104c/docs/src/reference/gremlin-applications.asciidoc#graphbinary if serializer_lower == '': message_serializer = DEFAULT_GREMLIN_SERIALIZER - elif serializer_lower in GRAPHSONV3_VARIANTS: - message_serializer = 'graphsonv3' - elif serializer_lower in GRAPHSONV2_VARIANTS: - message_serializer = 'graphsonv2' - elif serializer_lower in GRAPHBINARYV1_VARIANTS: - message_serializer = 'graphbinaryv1' + elif 'graphson' in serializer_lower: + message_serializer = 'GraphSON' + if 'untyped' in serializer_lower: + message_serializer += 'Untyped' + if 'v1' in serializer_lower: + if 'untyped' in serializer_lower: + message_serializer += 'MessageSerializerV1' + else: + message_serializer += 'MessageSerializerGremlinV1' + elif 'v2' in serializer_lower: + message_serializer += 'MessageSerializerV2' + else: + message_serializer += 'MessageSerializerV3' + elif 'graphbinary' in serializer_lower: + message_serializer = GRAPHBINARYV1 else: print(f'Invalid Gremlin serializer specified, defaulting to graphsonv3. ' - f'Valid serializers: [graphsonv3, graphsonv2, graphbinaryv1].') + f'Valid serializers: {GREMLIN_SERIALIZERS_HTTP}.') message_serializer = DEFAULT_GREMLIN_SERIALIZER self.traversal_source = traversal_source @@ -93,16 +105,21 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str if include_protocol: protocol_lower = connection_protocol.lower() - if protocol_lower == '': - connection_protocol = DEFAULT_GREMLIN_PROTOCOL - elif protocol_lower in HTTP_PROTOCOL_FORMATS: + if message_serializer in GREMLIN_SERIALIZERS_HTTP: connection_protocol = DEFAULT_HTTP_PROTOCOL - elif protocol_lower in WS_PROTOCOL_FORMATS: - connection_protocol = DEFAULT_WS_PROTOCOL + if protocol_lower != '' and protocol_lower not in HTTP_PROTOCOL_FORMATS: + print(f"Enforcing HTTP protocol usage for serializer: {message_serializer}.") else: - print(f"Invalid connection protocol specified, defaulting to {DEFAULT_GREMLIN_PROTOCOL}. " - f"Valid protocols: [websockets, http].") - connection_protocol = DEFAULT_GREMLIN_PROTOCOL + if protocol_lower == '': + connection_protocol = DEFAULT_GREMLIN_PROTOCOL + elif protocol_lower in HTTP_PROTOCOL_FORMATS: + connection_protocol = DEFAULT_HTTP_PROTOCOL + elif protocol_lower in WS_PROTOCOL_FORMATS: + connection_protocol = DEFAULT_WS_PROTOCOL + else: + print(f"Invalid connection protocol specified, defaulting to {DEFAULT_GREMLIN_PROTOCOL}. " + f"Valid protocols: [websockets, http].") + connection_protocol = DEFAULT_GREMLIN_PROTOCOL self.connection_protocol = connection_protocol def to_dict(self): diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py index 28598ae5..1ca6b78e 100644 --- a/src/graph_notebook/magics/graph_magic.py +++ b/src/graph_notebook/magics/graph_magic.py @@ -52,10 +52,10 @@ NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \ STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \ SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \ - OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, + OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \ normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \ - DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name, - generate_snapshot_name) + DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, \ + GREMLIN_SERIALIZERS_WS, GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP, normalize_protocol_name, generate_snapshot_name) from graph_notebook.network import SPARQLNetwork from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns @@ -1249,14 +1249,23 @@ def gremlin(self, line, cell, local_ns: dict = None): using_http = False query_start = time.time() * 1000 # time.time() returns time in seconds w/high precision; x1000 to get in ms if self.client.is_neptune_domain(): - connection_protocol = normalize_protocol_name(args.connection_protocol) \ - if args.connection_protocol != '' \ - else self.graph_notebook_config.gremlin.connection_protocol + if args.connection_protocol != '': + connection_protocol = normalize_protocol_name(args.connection_protocol) + if connection_protocol == DEFAULT_WS_PROTOCOL and \ + self.graph_notebook_config.gremlin.message_serializer not in GREMLIN_SERIALIZERS_WS: + print("Unsupported serializer for GremlinPython client, " + "compatible serializers are: {GREMLIN_SERIALIZERS_WS}") + print("Defaulting to HTTP protocol.") + connection_protocol = DEFAULT_HTTP_PROTOCOL + else: + connection_protocol = self.graph_notebook_config.gremlin.connection_protocol try: if connection_protocol == DEFAULT_HTTP_PROTOCOL: using_http = True + message_serializer = self.graph_notebook_config.gremlin.message_serializer + message_serializer_mime = GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[message_serializer] query_res_http = self.client.gremlin_http_query(cell, headers={ - 'Accept': 'application/vnd.gremlin-v1.0+json;types=false'}) + 'Accept': message_serializer_mime}) query_res_http.raise_for_status() try: query_res_http_json = query_res_http.json() diff --git a/src/graph_notebook/neptune/client.py b/src/graph_notebook/neptune/client.py index 2d21a032..e113694f 100644 --- a/src/graph_notebook/neptune/client.py +++ b/src/graph_notebook/neptune/client.py @@ -32,7 +32,6 @@ # client >= 3.5.0 as the HashableDict is now part of that client driver. # import graph_notebook.neptune.gremlin.graphsonV3d0_MapType_objectify_patch # noqa F401 -DEFAULT_GREMLIN_SERIALIZER = 'graphsonv3' DEFAULT_GREMLIN_TRAVERSAL_SOURCE = 'g' DEFAULT_SPARQL_CONTENT_TYPE = 'application/x-www-form-urlencoded' DEFAULT_PORT = 8182 @@ -119,16 +118,35 @@ false_str_variants = [False, 'False', 'false', 'FALSE'] -GRAPHSONV3_VARIANTS = ['graphsonv3', 'graphsonv3d0', 'graphsonserializersv3d0', 'graphsonmessageserializerv3'] -GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0', 'graphsonmessageserializerv2'] -GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1', 'graphbinarymessageserializerv1'] +GRAPHSONV1 = 'GraphSONMessageSerializerGremlinV1' +GRAPHSONV2 = 'GraphSONMessageSerializerV2' +GRAPHSONV3 = 'GraphSONMessageSerializerV3' +GRAPHSONV1_UNTYPED = 'GraphSONUntypedMessageSerializerV1' +GRAPHSONV2_UNTYPED = 'GraphSONUntypedMessageSerializerV2' +GRAPHSONV3_UNTYPED = 'GraphSONUntypedMessageSerializerV3' +GRAPHBINARYV1 = 'GraphBinaryMessageSerializerV1' + +GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP = { + GRAPHSONV1: 'application/vnd.gremlin-v1.0+json', + GRAPHSONV2: 'application/vnd.gremlin-v2.0+json', + GRAPHSONV3: 'application/vnd.gremlin-v3.0+json', + GRAPHSONV1_UNTYPED: 'application/vnd.gremlin-v1.0+json;types=false', + GRAPHSONV2_UNTYPED: 'application/vnd.gremlin-v2.0+json;types=false', + GRAPHSONV3_UNTYPED: 'application/vnd.gremlin-v3.0+json;types=false', + GRAPHBINARYV1: 'application/vnd.graphbinary-v1.0' +} + +GREMLIN_SERIALIZERS_WS = [GRAPHSONV2, GRAPHSONV3, GRAPHBINARYV1] +GREMLIN_SERIALIZERS_HTTP = [GRAPHSONV1, GRAPHSONV1_UNTYPED, GRAPHSONV2_UNTYPED, GRAPHSONV3_UNTYPED] +GREMLIN_SERIALIZERS_ALL = GREMLIN_SERIALIZERS_WS + GREMLIN_SERIALIZERS_HTTP +DEFAULT_GREMLIN_SERIALIZER = GRAPHSONV1_UNTYPED DEFAULT_WS_PROTOCOL = "websockets" DEFAULT_HTTP_PROTOCOL = "http" WS_PROTOCOL_FORMATS = ["ws", "websocket", DEFAULT_WS_PROTOCOL] HTTP_PROTOCOL_FORMATS = ["https", "rest", DEFAULT_HTTP_PROTOCOL] GREMLIN_PROTOCOL_FORMATS = WS_PROTOCOL_FORMATS + HTTP_PROTOCOL_FORMATS -DEFAULT_GREMLIN_PROTOCOL = DEFAULT_WS_PROTOCOL +DEFAULT_GREMLIN_PROTOCOL = DEFAULT_HTTP_PROTOCOL STATISTICS_MODES = ["", "status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"] SUMMARY_MODES = ["", "basic", "detailed"] @@ -153,16 +171,22 @@ def is_allowed_neptune_host(hostname: str, host_allowlist: list): return False -def get_gremlin_serializer(serializer_str: str): - serializer_lower = serializer_str.lower() - if serializer_lower == 'graphbinaryv1': +def get_gremlin_serializer_driver_class(serializer_str: str): + if serializer_str == GRAPHBINARYV1: return serializer.GraphBinarySerializersV1() - elif serializer_lower == 'graphsonv2': + elif serializer_str == GRAPHSONV2: return serializer.GraphSONSerializersV2d0() else: return serializer.GraphSONSerializersV3d0() +def get_gremlin_serializer_mime(serializer_str: str): + if serializer_str in GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP.keys(): + return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[serializer_str] + else: + return GREMLIN_SERIALIZERS_CLASS_TO_MIME_MAP[GRAPHSONV1_UNTYPED] + + def normalize_protocol_name(protocol: str): if protocol in WS_PROTOCOL_FORMATS: return DEFAULT_WS_PROTOCOL @@ -223,7 +247,7 @@ def __init__(self, host: str, port: int = DEFAULT_PORT, self.gremlin_traversal_source = gremlin_traversal_source self.gremlin_username = gremlin_username self.gremlin_password = gremlin_password - self.gremlin_serializer = get_gremlin_serializer(gremlin_serializer) + self.gremlin_serializer = gremlin_serializer self.neo4j_username = neo4j_username self.neo4j_password = neo4j_password self.neo4j_auth = neo4j_auth @@ -373,9 +397,10 @@ def get_gremlin_connection(self, transport_kwargs) -> client.Client: request = self._prepare_request('GET', ws_url) traversal_source = 'g' if self.is_neptune_domain() else self.gremlin_traversal_source + message_serializer = get_gremlin_serializer_driver_class(self.gremlin_serializer) return client.Client(ws_url, traversal_source, transport_factory=transport_factory_args, username=self.gremlin_username, password=self.gremlin_password, - message_serializer=self.gremlin_serializer, + message_serializer=message_serializer, headers=dict(request.headers), **transport_kwargs) def gremlin_query(self, query, transport_args=None, bindings=None): diff --git a/test/unit/configuration/test_configuration.py b/test/unit/configuration/test_configuration.py index b45265eb..cf95a37d 100644 --- a/test/unit/configuration/test_configuration.py +++ b/test/unit/configuration/test_configuration.py @@ -51,7 +51,7 @@ def test_generate_default_config(self): self.assertEqual('', config.gremlin.username) self.assertEqual('', config.gremlin.password) self.assertEqual(DEFAULT_GREMLIN_PROTOCOL, config.gremlin.connection_protocol) - self.assertEqual('graphsonv3', config.gremlin.message_serializer) + self.assertEqual('GraphSONUntypedMessageSerializerV1', config.gremlin.message_serializer) self.assertEqual('neo4j', config.neo4j.username) self.assertEqual('password', config.neo4j.password) self.assertEqual(True, config.neo4j.auth) @@ -170,7 +170,7 @@ def test_get_configuration_generic_required_input(self): 'traversal_source': 'g', 'username': '', 'password': '', - 'message_serializer': 'graphsonv3' + 'message_serializer': 'GraphSONUntypedMessageSerializerV1' }, 'neo4j': { 'username': 'neo4j', @@ -197,7 +197,7 @@ def test_get_configuration_generic_all_input(self): 'traversal_source': 'a', 'username': 'user', 'password': 'pass', - 'message_serializer': 'graphbinaryv1' + 'message_serializer': 'GraphBinaryMessageSerializerV1' }, 'neo4j': { 'username': 'neo_user', @@ -267,8 +267,8 @@ def test_get_configuration_neptune_required_input(self): 'traversal_source': 'g', 'username': '', 'password': '', - 'message_serializer': 'graphsonv3', - 'connection_protocol': 'websockets' + 'message_serializer': 'GraphSONUntypedMessageSerializerV1', + 'connection_protocol': 'http' }, 'neo4j': { 'username': 'neo4j', @@ -300,7 +300,7 @@ def test_get_configuration_neptune_all_input(self): 'traversal_source': 'a', 'username': 'a_user', 'password': 'a_pass', - 'message_serializer': 'graphbinaryv1', + 'message_serializer': 'GraphSONUntypedMessageSerializerV3', 'connection_protocol': 'http' }, 'neo4j': { @@ -328,7 +328,7 @@ def test_get_configuration_neptune_all_input(self): 'traversal_source': 'g', 'username': '', 'password': '', - 'message_serializer': 'graphbinaryv1', + 'message_serializer': 'GraphSONUntypedMessageSerializerV3', 'connection_protocol': 'http' }, 'neo4j': { @@ -472,7 +472,7 @@ def test_configuration_gremlinsection_generic_default(self): self.assertEqual(config.gremlin.traversal_source, 'g') self.assertEqual(config.gremlin.username, '') self.assertEqual(config.gremlin.password, '') - self.assertEqual(config.gremlin.message_serializer, 'graphsonv3') + self.assertEqual(config.gremlin.message_serializer, 'GraphSONUntypedMessageSerializerV1') self.assertFalse(hasattr(config.gremlin, "connection_protocol")) def test_configuration_gremlinsection_generic_override(self): @@ -486,7 +486,7 @@ def test_configuration_gremlinsection_generic_override(self): self.assertEqual(config.gremlin.traversal_source, 't') self.assertEqual(config.gremlin.username, 'foo') self.assertEqual(config.gremlin.password, 'bar') - self.assertEqual(config.gremlin.message_serializer, 'graphbinaryv1') + self.assertEqual(config.gremlin.message_serializer, 'GraphBinaryMessageSerializerV1') self.assertFalse(hasattr(config.gremlin, "connection_protocol")) def test_configuration_gremlinsection_neptune_default(self): @@ -494,7 +494,7 @@ def test_configuration_gremlinsection_neptune_default(self): self.assertEqual(config.gremlin.traversal_source, 'g') self.assertEqual(config.gremlin.username, '') self.assertEqual(config.gremlin.password, '') - self.assertEqual(config.gremlin.message_serializer, 'graphsonv3') + self.assertEqual(config.gremlin.message_serializer, 'GraphSONUntypedMessageSerializerV1') self.assertEqual(config.gremlin.connection_protocol, DEFAULT_GREMLIN_PROTOCOL) def test_configuration_gremlinsection_neptune_override(self): @@ -510,7 +510,7 @@ def test_configuration_gremlinsection_neptune_override(self): self.assertEqual(config.gremlin.traversal_source, 'g') self.assertEqual(config.gremlin.username, '') self.assertEqual(config.gremlin.password, '') - self.assertEqual(config.gremlin.message_serializer, 'graphbinaryv1') + self.assertEqual(config.gremlin.message_serializer, 'GraphBinaryMessageSerializerV1') self.assertEqual(config.gremlin.connection_protocol, DEFAULT_HTTP_PROTOCOL) def test_configuration_gremlinsection_protocol_neptune_default_with_proxy(self): diff --git a/test/unit/configuration/test_configuration_from_main.py b/test/unit/configuration/test_configuration_from_main.py index b76dc4af..0d625517 100644 --- a/test/unit/configuration/test_configuration_from_main.py +++ b/test/unit/configuration/test_configuration_from_main.py @@ -9,7 +9,7 @@ from graph_notebook.configuration.generate_config import AuthModeEnum, Configuration, GremlinSection from graph_notebook.configuration.get_config import get_config from graph_notebook.neptune.client import (NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, - DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL) + DEFAULT_HTTP_PROTOCOL) class TestGenerateConfigurationMain(unittest.TestCase): @@ -135,7 +135,7 @@ def test_generate_configuration_main_gremlin_protocol_no_service(self): self.assertEqual(0, result) config = get_config(self.test_file_path) config_dict = config.to_dict() - self.assertEqual(DEFAULT_WS_PROTOCOL, config_dict['gremlin']['connection_protocol']) + self.assertEqual(DEFAULT_HTTP_PROTOCOL, config_dict['gremlin']['connection_protocol']) def test_generate_configuration_main_gremlin_protocol_db(self): result = os.system(f'{self.python_cmd} -m graph_notebook.configuration.generate_config ' @@ -149,7 +149,7 @@ def test_generate_configuration_main_gremlin_protocol_db(self): self.assertEqual(0, result) config = get_config(self.test_file_path) config_dict = config.to_dict() - self.assertEqual(DEFAULT_WS_PROTOCOL, config_dict['gremlin']['connection_protocol']) + self.assertEqual(DEFAULT_HTTP_PROTOCOL, config_dict['gremlin']['connection_protocol']) def test_generate_configuration_main_gremlin_protocol_analytics(self): result = os.system(f'{self.python_cmd} -m graph_notebook.configuration.generate_config '