Skip to content

Fix issue when isolating libraries when file not ready #854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jun 4, 2021
3 changes: 2 additions & 1 deletion azure_functions_worker/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

# Platform Environment Variables
AZURE_WEBJOBS_SCRIPT_ROOT = "AzureWebJobsScriptRoot"
CONTAINER_NAME = "CONTAINER_NAME"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have this constant in dependency.py itself? This isn't being used globally.


# Python Specific Feature Flags and App Settings
PYTHON_ROLLBACK_CWD_PATH = "PYTHON_ROLLBACK_CWD_PATH"
Expand All @@ -37,7 +38,7 @@
PYTHON_THREADPOOL_THREAD_COUNT_MIN = 1
PYTHON_THREADPOOL_THREAD_COUNT_MAX = 32
PYTHON_ISOLATE_WORKER_DEPENDENCIES_DEFAULT = False
PYTHON_ISOLATE_WORKER_DEPENDENCIES_DEFAULT_39 = False
PYTHON_ISOLATE_WORKER_DEPENDENCIES_DEFAULT_39 = True
PYTHON_ENABLE_WORKER_EXTENSIONS_DEFAULT = False
PYTHON_ENABLE_WORKER_EXTENSIONS_DEFAULT_39 = True

Expand Down
9 changes: 6 additions & 3 deletions azure_functions_worker/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,11 @@ async def _handle__worker_init_request(self, req):
constants.SHARED_MEMORY_DATA_TRANSFER: _TRUE,
}

# Can detech worker packages
DependencyManager.prioritize_customer_dependencies()
# Can detech worker packages only when customer's code is present
# This only works in dedicated and premium sku.
# The consumption sku will switch on environment_reload request.
if not DependencyManager.is_in_linux_consumption():
DependencyManager.prioritize_customer_dependencies()

return protos.StreamingMessage(
request_id=self.request_id,
Expand Down Expand Up @@ -480,7 +483,7 @@ async def _handle__function_environment_reload_request(self, req):
)

# Reload azure google namespaces
DependencyManager.reload_azure_google_namespace(
DependencyManager.reload_customer_libraries(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the # Reload azure google namespaces comment here. Not needed.

func_env_reload_request.function_app_directory
)

Expand Down
137 changes: 87 additions & 50 deletions azure_functions_worker/utils/dependency.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from azure_functions_worker.utils.common import is_true_like
from typing import List, Optional
from types import ModuleType
import importlib
import inspect
import os
Expand All @@ -9,6 +10,7 @@
from ..logging import logger
from ..constants import (
AZURE_WEBJOBS_SCRIPT_ROOT,
CONTAINER_NAME,
PYTHON_ISOLATE_WORKER_DEPENDENCIES,
PYTHON_ISOLATE_WORKER_DEPENDENCIES_DEFAULT,
PYTHON_ISOLATE_WORKER_DEPENDENCIES_DEFAULT_39
Expand Down Expand Up @@ -66,6 +68,10 @@ def initialize(cls):
cls.cx_working_dir = cls._get_cx_working_dir()
cls.worker_deps_path = cls._get_worker_deps_path()

@classmethod
def is_in_linux_consumption(cls):
return CONTAINER_NAME in os.environ

@classmethod
@enable_feature_by(
flag=PYTHON_ISOLATE_WORKER_DEPENDENCIES,
Expand All @@ -87,6 +93,11 @@ def use_worker_dependencies(cls):
# The following log line will not show up in core tools but should
# work in kusto since core tools only collects gRPC logs. This function
# is executed even before the gRPC logging channel is ready.
logger.info(f'Applying use_worker_dependencies:'
f' worker_dependencies: {cls.worker_deps_path},'
f' customer_dependencies: {cls.cx_deps_path},'
f' working_directory: {cls.cx_working_dir}')

cls._remove_from_sys_path(cls.cx_deps_path)
cls._remove_from_sys_path(cls.cx_working_dir)
cls._add_to_sys_path(cls.worker_deps_path, True)
Expand All @@ -101,7 +112,7 @@ def use_worker_dependencies(cls):
PYTHON_ISOLATE_WORKER_DEPENDENCIES_DEFAULT
)
)
def prioritize_customer_dependencies(cls):
def prioritize_customer_dependencies(cls, cx_working_dir=None):
"""Switch the sys.path and ensure the customer's code import are loaded
from CX's deppendencies.

Expand All @@ -112,24 +123,50 @@ def prioritize_customer_dependencies(cls):
As for Linux Consumption, this will only remove worker_deps_path,
but the customer's path will be loaded in function_environment_reload.

The search order of a module name in customer frame is:
The search order of a module name in customer's paths is:
1. cx_deps_path
2. cx_working_dir
2. worker_deps_path
3. cx_working_dir
"""
# Try to get the latest customer's working directory
# cx_working_dir => cls.cx_working_dir => AzureWebJobsScriptRoot
working_directory: str = ''
if cx_working_dir:
working_directory: str = os.path.abspath(cx_working_dir)
if not working_directory:
working_directory = cls.cx_working_dir
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can there ever be a case that cx_working_dir isn't a property in cls?

if not working_directory:
working_directory = os.getenv(AZURE_WEBJOBS_SCRIPT_ROOT, '')

# Try to get the latest customer's dependency path
cx_deps_path: str = cls._get_cx_deps_path()
if not cx_deps_path:
cx_deps_path = cls.cx_deps_path

logger.info('Applying prioritize_customer_dependencies:'
f' worker_dependencies: {cls.worker_deps_path},'
f' customer_dependencies: {cx_deps_path},'
f' working_directory: {working_directory}')

cls._remove_from_sys_path(cls.worker_deps_path)
cls._add_to_sys_path(cls.cx_deps_path, True)
cls._add_to_sys_path(cls.cx_working_dir, False)

# Deprioritize worker dependencies but don't completely remove it
# Otherwise, it will break some really old function apps, those
# don't have azure-functions module in .python_packages
# https://github.com/Azure/azure-functions-core-tools/pull/1498
cls._add_to_sys_path(cls.worker_deps_path, False)

logger.info(f'Start using customer dependencies {cls.cx_deps_path}')
# The modules defined in customer's working directory should have the
# least priority since we uses the new folder structure.
# Please check the "Message to customer" section in the following PR:
# https://github.com/Azure/azure-functions-python-worker/pull/726
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make a pinned issue as well (or check if this is added to Python Dev Docs itself)?

cls._add_to_sys_path(working_directory, False)

logger.info('Finished prioritize_customer_dependencies')

@classmethod
def reload_azure_google_namespace(cls, cx_working_dir: str):
def reload_customer_libraries(cls, cx_working_dir: str):
"""Reload azure and google namespace, this including any modules in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not only doing Azure and Google right? Comment should reflect this now.

this namespace, such as azure-functions, grpcio, grpcio-tools etc.

Expand All @@ -152,7 +189,7 @@ def reload_azure_google_namespace(cls, cx_working_dir: str):
use_new = is_true_like(use_new_env)

if use_new:
cls.reload_all_namespaces_from_customer_deps(cx_working_dir)
cls.prioritize_customer_dependencies(cx_working_dir)
else:
cls.reload_azure_google_namespace_from_worker_deps()

Expand Down Expand Up @@ -189,33 +226,6 @@ def reload_azure_google_namespace_from_worker_deps(cls):
logger.info('Unable to reload azure.functions. '
'Using default. Exception:\n{}'.format(ex))

@classmethod
def reload_all_namespaces_from_customer_deps(cls, cx_working_dir: str):
"""This is a new implementation of reloading azure and google
namespace from customer's .python_packages folder. Only intended to be
used in Linux Consumption scenario.

Parameters
----------
cx_working_dir: str
The path which contains customer's project file (e.g. wwwroot).
"""
# Specialized working directory needs to be added
working_directory: str = os.path.abspath(cx_working_dir)

# Switch to customer deps and clear out all module cache in worker deps
cls._remove_from_sys_path(cls.worker_deps_path)
cls._add_to_sys_path(cls.cx_deps_path, True)
cls._add_to_sys_path(working_directory, False)

# Deprioritize worker dependencies but don't completely remove it
# Otherwise, it will break some really old function apps, those
# don't have azure-functions module in .python_packages
# https://github.com/Azure/azure-functions-core-tools/pull/1498
cls._add_to_sys_path(cls.worker_deps_path, False)

logger.info('Reloaded all namespaces from customer dependencies')

@classmethod
def _add_to_sys_path(cls, path: str, add_to_first: bool):
"""This will ensure no duplicated path are added into sys.path and
Expand Down Expand Up @@ -325,7 +335,18 @@ def _get_worker_deps_path() -> str:
if worker_deps_paths:
return worker_deps_paths[0]

# 2. If it fails to find one, try to find one from the parent path
# 2. Try to find module spec of azure.functions without actually
# importing it (e.g. lib/site-packages/azure/functions/__init__.py)
try:
azf_spec = importlib.util.find_spec('azure.functions')
if azf_spec and azf_spec.origin:
return os.path.abspath(
os.path.join(os.path.dirname(azf_spec.origin), '..', '..')
)
except ModuleNotFoundError:
logger.warning('Cannot locate built-in azure.functions module')

# 3. If it fails to find one, try to find one from the parent path
# This is used for handling the CI/localdev environment
return os.path.abspath(
os.path.join(os.path.dirname(__file__), '..', '..')
Expand All @@ -341,18 +362,34 @@ def _remove_module_cache(path: str):
path: str
The module cache to be removed if it is imported from this path.
"""
all_modules = set(sys.modules.keys()) - set(sys.builtin_module_names)
for module_name in all_modules:
module = sys.modules[module_name]
# Module path can be actual file path or a pure namespace path
# For actual files: use __file__ attribute to retrieve module path
# For namespace: use __path__[0] to retrieve module path
module_path = ''
if getattr(module, '__file__', None):
module_path = os.path.dirname(module.__file__)
elif getattr(module, '__path__', None) and getattr(
module.__path__, '_path', None):
module_path = module.__path__._path[0]

if module_path.startswith(path):
sys.modules.pop(module_name)
if not path:
return

not_builtin = set(sys.modules.keys()) - set(sys.builtin_module_names)

# Don't reload azure_functions_worker
to_be_cleared_from_cache = set([
module_name for module_name in not_builtin
if not module_name.startswith('azure_functions_worker')
])

for module_name in to_be_cleared_from_cache:
module = sys.modules.get(module_name)
if not isinstance(module, ModuleType):
continue

# Module path can be actual file path or a pure namespace path.
# Both of these has the module path placed in __path__ property
# The property .__path__ can be None or does not exist in module
try:
module_paths = set(getattr(module, '__path__', None) or [])
if hasattr(module, '__file__') and module.__file__:
module_paths.add(module.__file__)

if any([p for p in module_paths if p.startswith(path)]):
sys.modules.pop(module_name)
except Exception as e:
logger.warning(
f'Attempt to remove module cache for {module_name} but'
f' failed with {e}. Using the original module cache.'
)
3 changes: 3 additions & 0 deletions pack/templates/nix_env_gen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ steps:
inputs:
disableAutoCwd: true
scriptPath: 'pack/scripts/nix_deps.sh'
# The following task is disabled since Python 3.9 to fix azure/ namespace
# https://github.com/Azure/azure-functions-python-worker/pull/854
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will we re-enable this particular task later?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is an actual fix for Python 3.9 and afterward.

- task: CopyFiles@2
condition: and(succeeded(), in(variables['pythonVersion'], '3.6', '3.7', '3.8'))
inputs:
contents: |
pack/utils/__init__.py
Expand Down
3 changes: 3 additions & 0 deletions pack/templates/win_env_gen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ steps:
- task: PowerShell@2
inputs:
filePath: 'pack\scripts\win_deps.ps1'
# The following task is disabled since Python 3.9 to fix azure/ namespace
# https://github.com/Azure/azure-functions-python-worker/pull/854
- task: CopyFiles@2
condition: and(succeeded(), in(variables['pythonVersion'], '3.6', '3.7', '3.8'))
inputs:
contents: |
pack\utils\__init__.py
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

# This is a dummy protobuf==3.9.0 package used for E2E
# testing in Azure Functions Python Worker

__version__ = '3.9.0'
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

# This is a dummy grpcio==1.35.0 package used for E2E
# testing in Azure Functions Python Worker.

__version__ = '1.35.0'
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import sys
import os
import json
import azure.functions as func
import google.protobuf as proto
import grpc

# Load dependency manager from customer' context
from azure_functions_worker.utils.dependency import DependencyManager as dm


def main(req: func.HttpRequest) -> func.HttpResponse:
"""This function is an HttpTrigger to check if the modules are loaded from
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice test! :)

customer's dependencies. We have mock a .python_packages/ folder in
this e2e test function app which contains the following stub package:

azure.functions==1.2.1
protobuf==3.9.0
grpc==1.35.0

If the version we check is the same as the one in local .python_packages/,
that means the isolate worker dependencies are working as expected.
"""
result = {
"sys.path": list(sys.path),
"dependency_manager": {
"cx_deps_path": dm._get_cx_deps_path(),
"cx_working_dir": dm._get_cx_working_dir(),
"worker_deps_path": dm._get_worker_deps_path(),
},
"libraries": {
"func.expected.version": "1.2.1",
"func.version": func.__version__,
"func.file": func.__file__,
"proto.expected.version": "3.9.0",
"proto.version": proto.__version__,
"proto.file": proto.__file__,
"grpc.expected.version": "1.35.0",
"grpc.version": grpc.__version__,
"grpc.file": grpc.__file__,
},
"environments": {
"PYTHON_ISOLATE_WORKER_DEPENDENCIES": (
os.getenv('PYTHON_ISOLATE_WORKER_DEPENDENCIES')
),
"AzureWebJobsScriptRoot": os.getenv('AzureWebJobsScriptRoot'),
"PYTHONPATH": os.getenv('PYTHONPATH'),
"HOST_VERSION": os.getenv('HOST_VERSION')
}
}
return func.HttpResponse(json.dumps(result))
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"scriptFile": "__init__.py",
"bindings": [
{
"authLevel": "function",
"type": "httpTrigger",
"direction": "in",
"name": "req",
"methods": [
"get",
"post"
]
},
{
"type": "http",
"direction": "out",
"name": "$return"
}
]
}
Loading