Skip to content

feat: add support for cloning GitLab repositories #316

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
2b49374
chore: modify git service schema in defaults.ini
nathanwn Jun 19, 2023
0ed51fd
chore: implement load config methods for git service classes
nathanwn Jun 19, 2023
eea81c7
chore: fix broken functions and tests due to new ini config schema
nathanwn Jun 19, 2023
185483e
chore: generalize git service detection logic to use git service domain
nathanwn Jun 19, 2023
e2a1c32
chore: implement logic to clone repositories
nathanwn Jun 19, 2023
fee0da3
chore: remove unnecessary repo availability check before clone
nathanwn Jun 19, 2023
2b5f2e1
chore: propagate GitLab access token env vars into the container
nathanwn Jun 19, 2023
750b054
chore: add instructing comments about access token in default ini file
nathanwn Jun 20, 2023
c96e6b7
chore: remove unnecessary str type cast when logging
nathanwn Jun 20, 2023
b42a9e3
chore: fix typo in docstring
nathanwn Jun 20, 2023
dbb2748
chore: git integration test for cloning a public GitLab repository
nathanwn Jun 20, 2023
f5a4bd0
chore: adjust error messages and logging
nathanwn Jun 21, 2023
4712b13
chore: revert and add log messages for cloning repos
nathanwn Jun 21, 2023
4e41752
chore: minor fix for BaseGitService docstring
nathanwn Jun 21, 2023
185f587
chore: construct GitLab clone URL with urllib utilities
nathanwn Jun 21, 2023
2e607da
chore: add tests for function to get allowed git service domains
nathanwn Jun 21, 2023
407eebb
chore: add error handling for urlparse call
nathanwn Jun 21, 2023
9425dfe
chore: modify git_url tests to interact directly with the global defa…
nathanwn Jun 22, 2023
965282f
chore: minor literal string styling fix
nathanwn Jun 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions scripts/dev_scripts/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@ echo "micronaut-projects/micronaut-core: Analyzing the repo path and the branch
echo -e "----------------------------------------------------------------------------------\n"
$RUN_MACARON analyze -rp https://github.com/micronaut-projects/micronaut-core -b 3.5.x --skip-deps || log_fail

echo -e "\n----------------------------------------------------------------------------------"
echo "gitlab.com/tinyMediaManager/tinyMediaManager: Analyzing the repo path and the branch name when automatic dependency resolution is skipped."
echo -e "----------------------------------------------------------------------------------\n"
JSON_EXPECTED=$WORKSPACE/tests/e2e/expected_results/tinyMediaManager/tinyMediaManager.json
JSON_RESULT=$WORKSPACE/output/reports/gitlab_com/tinyMediaManager/tinyMediaManager/tinyMediaManager.json
$RUN_MACARON analyze -rp https://gitlab.com/tinyMediaManager/tinyMediaManager -b main -d cca6b67a335074eca42136556f0a321f75dc4f48 --skip-deps || log_fail

python $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail

echo -e "\n----------------------------------------------------------------------------------"
echo "jenkinsci/plot-plugin: Analyzing the repo path, the branch name and the commit digest when automatic dependency resolution is skipped."
echo -e "----------------------------------------------------------------------------------\n"
Expand Down
2 changes: 2 additions & 0 deletions scripts/release_scripts/run_macaron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,8 @@ docker run \
-e "USER_UID=${USER_UID}" \
-e "USER_GID=${USER_GID}" \
-e "GITHUB_TOKEN=${GITHUB_TOKEN}" \
-e "MCN_PUBLIC_GITLAB_TOKEN=${MCN_PUBLIC_GITLAB_TOKEN}" \
-e "MCN_PRIVATE_GITLAB_TOKEN=${MCN_PRIVATE_GITLAB_TOKEN}" \
"${proxy_vars[@]}" \
"${prod_vars[@]}" \
"${mounts[@]}" \
Expand Down
12 changes: 12 additions & 0 deletions src/macaron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
from macaron.config.defaults import create_defaults, load_defaults
from macaron.config.global_config import global_config
from macaron.config.target_config import TARGET_CONFIG_SCHEMA
from macaron.errors import ConfigurationError
from macaron.output_reporter.reporter import HTMLReporter, JSONReporter, PolicyReporter
from macaron.parsers.yaml.loader import YamlLoader
from macaron.policy_engine.policy_engine import run_policy_engine, show_prelude
from macaron.slsa_analyzer.analyzer import Analyzer
from macaron.slsa_analyzer.git_service import GIT_SERVICES

logger: logging.Logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -134,6 +136,16 @@ def perform_action(action_args: argparse.Namespace) -> None:
logger.error("GitHub access token not set.")
sys.exit(os.EX_USAGE)
global_config.gh_token = gh_token

# TODO: Here we should try to statically analyze the config before
# actually running the analysis.
try:
for git_service in GIT_SERVICES:
git_service.load_defaults()
except ConfigurationError as error:
logger.error(error)
sys.exit(os.EX_USAGE)

analyze_slsa_levels_single(action_args)
case _:
logger.error("Macaron does not support command option %s.", action_args.action)
Expand Down
28 changes: 20 additions & 8 deletions src/macaron/config/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,26 @@ parent_limit = 10
# E.g. com.oracle.coherence.ce:coherence
artifact_ignore_list =

[git]
# The list of allowed git hosts.
# Host names are separated by spaces and they can be defined in multiple lines.
# Duplicated host names are ignored.
allowed_hosts =
github.com
ol-bitbucket.us.oracle.com
gitlab.com
# Git services that Macaron has access to clone repositories.
# For security purposes, Macaron will only clone repositories from the domains specified.

# Access to GitHub is required in most case for Macaron to analyse not only the main
# repo but also its dependencies.
[git_service.github]
domain = github.com

# Access to public GitLab (gitlab.com).
# An optional access token can be provided through the `MCN_PUBLIC_GITLAB_TOKEN` environment variable.
# This access token is optional, only necessary when you need to clone private repositories.
# The `read_repository` permission is required for this token.
[git_service.gitlab.public]
domain = gitlab.com

# Access to a private GitLab instance (e.g. your organization's self-hosted GitLab instance).
# If this section is enabled, an access token must be provided through the `MCN_PRIVATE_GITLAB_TOKEN` environment variable.
# The `read_repository` permission is required for this token.
# [git_service.gitlab.private]
# domain = example.org

# This is the spec for trusted Maven build tools.
[builder.maven]
Expand Down
8 changes: 8 additions & 0 deletions src/macaron/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,11 @@ class CUEExpectationError(MacaronError):

class CUERuntimeError(MacaronError):
"""Happens when there are errors in CUE expectation validation."""


class ConfigurationError(MacaronError):
"""Happens when there is an error in the configuration (.ini) file."""


class CloneError(MacaronError):
"""Happens when cannot clone a git repository."""
12 changes: 6 additions & 6 deletions src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
NoneDependencyAnalyzer,
)
from macaron.dependency_analyzer.cyclonedx import get_deps_from_sbom
from macaron.errors import CloneError
from macaron.output_reporter.reporter import FileReporter
from macaron.output_reporter.results import Record, Report, SCMStatus
from macaron.slsa_analyzer import git_url
Expand Down Expand Up @@ -527,13 +528,13 @@ def _prepare_repo(
return None

git_service = self.get_git_service(resolved_remote_path)
if not git_service.can_clone_remote_repo(resolved_remote_path):
logger.error("Cannot clone the remote repo at %s", resolved_remote_path)
return None

repo_unique_path = git_url.get_repo_dir_name(resolved_remote_path)
resolved_local_path = os.path.join(target_dir, repo_unique_path)
git_url.clone_remote_repo(resolved_local_path, resolved_remote_path)
try:
git_service.clone_repo(resolved_local_path, resolved_remote_path)
except CloneError as error:
logger.error("Cannot clone %s: %s", resolved_remote_path, str(error))
return None
else:
logger.info("The path to repo %s is a local path.", repo_path)
resolved_local_path = self._resolve_local_path(self.local_repos_path, repo_path)
Expand Down Expand Up @@ -577,7 +578,6 @@ def get_git_service(remote_path: str) -> BaseGitService:
The git service derived from the remote path.
"""
for git_service in GIT_SERVICES:
git_service.load_defaults()
if git_service.is_detected(remote_path):
return git_service

Expand Down
6 changes: 3 additions & 3 deletions src/macaron/slsa_analyzer/git_service/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""The git_service package contains the supported git services for Macaron."""

from .base_git_service import BaseGitService
from .bitbucket import BitBucket
from .github import GitHub
from .gitlab import GitLab
from .gitlab import PrivateGitLab, PublicGitLab

# The list of supported git services. The order of the list determines the order
# in which each git service is checked against the target repository.
GIT_SERVICES: list[BaseGitService] = [GitHub(), GitLab(), BitBucket()]
GIT_SERVICES: list[BaseGitService] = [GitHub(), PublicGitLab(), PrivateGitLab(), BitBucket()]
110 changes: 84 additions & 26 deletions src/macaron/slsa_analyzer/git_service/base_git_service.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the BaseGitService class to be inherited by a git service."""

from abc import abstractmethod

from macaron.config.defaults import defaults
from macaron.errors import CloneError, ConfigurationError
from macaron.slsa_analyzer import git_url


class BaseGitService:
"""This abstract class is used to implement git services."""
Expand All @@ -18,15 +22,56 @@ def __init__(self, name: str) -> None:
The name of the git service.
"""
self.name = name
self.domain: str | None = None

@abstractmethod
def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
"""Load the values for this git service from the ini configuration."""
raise NotImplementedError

@abstractmethod
def load_domain(self, section_name: str) -> str | None:
"""Load the domain of the git service from the ini configuration section ``section_name``.

The section may or may not be available in the configuration. In both cases,
the method should not raise ``ConfigurationError``.

Meanwhile, if the section is present but there is a schema violation (e.g. a key such as
``domain`` is missing), this method will raise a ``ConfigurationError``.

Parameters
----------
section_name : str
The name of the git service section in the ini configuration file.

Returns
-------
str | None
The domain. This can be ``None`` if the git service section is not found in
the ini configuration file, meaning the user does not enable the
corresponding git service.

Raises
------
ConfigurationError
If there is a schema violation in the git service section.
"""
if not defaults.has_section(section_name):
# We do not raise ConfigurationError here because it is not compulsory
# to have all available git services in the ini config.
return None
section = defaults[section_name]
domain = section.get("domain")
if not domain:
raise ConfigurationError(
f'The "domain" key is missing in section [{section_name}] of the .ini configuration file.'
)
return domain

def is_detected(self, url: str) -> bool:
"""Return True if the remote repo is using this git service.
"""Check if the remote repo at the given ``url`` is hosted on this git service.

This check is done by checking the URL of the repo against the domain of this
git service.

Parameters
----------
Expand All @@ -36,25 +81,36 @@ def is_detected(self, url: str) -> bool:
Returns
-------
bool
True if this git service is detected else False.
True if the repo is indeed hosted on this git service.
"""
raise NotImplementedError
if self.domain is None:
return False
return (
git_url.parse_remote_url(
url,
allowed_git_service_domains=[self.domain],
)
is not None
)

@abstractmethod
def can_clone_remote_repo(self, url: str) -> bool:
"""Return True if the remote repository can be cloned.
def clone_repo(self, clone_dir: str, url: str) -> None:
"""Clone a repository.

Parameters
----------
clone_dir: str
The name of the directory to clone into.
This is equivalent to the <directory> argument of ``git clone``.
url : str
The remote url.
The url to the repository.

Returns
-------
bool
True if the repo can be cloned, else False.
Raises
------
CloneError
If there is an error cloning the repo.
"""
raise NotImplementedError
raise NotImplementedError()


class NoneGitService(BaseGitService):
Expand All @@ -65,7 +121,11 @@ def __init__(self) -> None:
super().__init__("")

def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
"""Load the values for this git service from the ini configuration.

In this particular case, since this class represents a ``None`` git service,
we do nothing.
"""

def is_detected(self, url: str) -> bool:
"""Return True if the remote repo is using this git service.
Expand All @@ -82,17 +142,15 @@ def is_detected(self, url: str) -> bool:
"""
return False

def can_clone_remote_repo(self, url: str) -> bool:
"""Return True if the remote repository can be cloned.
def clone_repo(self, _clone_dir: str, url: str) -> None:
"""Clone a repo.

Parameters
----------
url : str
The remote url.
In this particular case, since this class represents a ``None`` git service,
we do nothing but raise a ``CloneError``.

Returns
-------
bool
True if the repo can be cloned, else False.
Raises
------
CloneError
Always raise, since this method should not be used to clone any repository.
"""
return False
raise CloneError(f"Internal error encountered when cloning the repo '{url}'.")
43 changes: 7 additions & 36 deletions src/macaron/slsa_analyzer/git_service/bitbucket.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the spec for the BitBucket service."""

import logging

from macaron.slsa_analyzer import git_url
from macaron.slsa_analyzer.git_service.base_git_service import BaseGitService

logger: logging.Logger = logging.getLogger(__name__)
Expand All @@ -19,39 +18,11 @@ def __init__(self) -> None:
super().__init__("bitbucket")

def load_defaults(self) -> None:
"""Load the default values from defaults.ini."""
"""Load the values for this git service from the ini configuration."""
# TODO: implement this once support for BitBucket is added.
return None

def can_clone_remote_repo(self, url: str) -> bool:
"""Return True if the remote repository can be cloned.

Parameters
----------
url : str
The remote url.

Returns
-------
bool
True if the repo can be cloned, else False.
"""
def clone_repo(self, _clone_dir: str, _url: str) -> None:
"""Clone a BitBucket repo."""
# TODO: implement this once support for BitBucket is added.
logger.info("Cloning BitBucket repositories is not supported yet. Please clone the repository manually.")
return False

def is_detected(self, url: str) -> bool:
"""Return True if the remote repo is using this git service.

Parameters
----------
url : str
The url of the remote repo.

Returns
-------
bool
True if this git service is detected else False.
"""
parsed_url = git_url.parse_remote_url(url)
if not parsed_url or self.name not in parsed_url.netloc:
return False

return True
Loading