Commit 5b7989e8 authored by Sebastien Tourbier's avatar Sebastien Tourbier
Browse files

fix: make changes in Datalad-related code that make tests working

parent 32136cfc
Loading
Loading
Loading
Loading
+30 −6
Original line number Diff line number Diff line
@@ -82,7 +82,7 @@ def create_initial_participants_tsv(bids_dir):
        f.write("participant_id\tage\tsex\tgroup")


def create_empty_bids_dataset(bids_dir=None, dataset_desc=None):
def create_empty_bids_dataset(bids_dir=None, dataset_desc=None, project_dir=None):
    """Create an empty BIDS dataset.

    Parameters
@@ -92,13 +92,29 @@ def create_empty_bids_dataset(bids_dir=None, dataset_desc=None):

    dataset_desc : dict
        Dictionary with the content of the dataset_description.json file.

    project_dir : str
        Path to the project directory in which the BIDS dataset will be nested.
    """
    print("> Creating an empty BIDS dataset at: ", bids_dir, "...")
    bids_dirname = os.path.basename(bids_dir)
    # Create the BIDS dataset directory if it does not exist
    if not os.path.exists(os.path.dirname(bids_dir)):
        os.makedirs(bids_dir, exist_ok=True)
    # Initialize the BIDS dataset as a Datalad-managed dataset
    datalad.api.create(dataset=bids_dir, cfg_proc=["text2git", "bids"])
    # Initialize the BIDS dataset as a Datalad-managed dataset.
    create_params = {
        "cfg_proc": ["text2git", "bids"],
        "force": True,  # Enforce dataset creation in a non-empty directory
    }
    # If project_dir is specified, create the dataset as a subdataset
    # of the project dataset
    if project_dir:
        create_params["dataset"] = project_dir
        create_params["path"] = bids_dirname
    # Otherwise, create a standalone dataset
    else:
        create_params["dataset"] = bids_dir
    datalad.api.create(**create_params)
    # Create the dataset_description.json file
    with open(os.path.join(bids_dir, "dataset_description.json"), "w") as f:
        json.dump(dataset_desc, f, indent=4)
@@ -112,9 +128,17 @@ def create_empty_bids_dataset(bids_dir=None, dataset_desc=None):
    # Create an initial participants.tsv file
    create_initial_participants_tsv(bids_dir)
    # Save the state of the initial dataset
    datalad.api.save(
        dataset=bids_dir, message="Initial blank BIDS dataset", recursive=True
    )
    save_params = {
        "message": "Initial blank BIDS dataset of collaborative project",
        "recursive": False,
    }
    if project_dir:
        save_params["dataset"] = project_dir
        save_params["path"] = bids_dirname
    else:
        save_params["dataset"] = bids_dir
    datalad.api.save(**save_params)
    print(SUCCESS)


def create_bids_layout(bids_dir=None, **kwargs):
+17 −8
Original line number Diff line number Diff line
@@ -45,15 +45,23 @@ class DatasetHandler:
        if not os.path.isdir(ds_path):
            os.makedirs(ds_path)
        # Initialize the BIDS dataset as a Datalad-managed dataset
        datalad.api.create(dataset=ds_path, cfg_proc=["text2git", "bids"])
        create_params = {
            "dataset": ds_path,
            "cfg_proc": ["text2git", "bids"],
            "force": True,  # Enforce dataset creation in a non-empty directory
        }
        datalad.api.create(**create_params)
        datasetdesc_path = os.path.join(ds_path, "dataset_description.json")
        if not os.path.isfile(datasetdesc_path):
            # Write the dataset_description.json file
            datasetdesc_dict.write_file(jsonfilename=datasetdesc_path)
            # Save the state of the dataset with Datalad
            datalad.api.save(
                dataset=ds_path, message="Initial BIDS dataset state", recursive=True
            )
            save_params = {
                "dataset": ds_path,
                "message": "Initial BIDS dataset state",
                "recursive": True,
            }
            datalad.api.save(**save_params)
        # Load the created BIDS dataset in BIDS Manager (creates companion files)
        ds_obj = BidsDataset(ds_path)
        if ds_obj:
@@ -105,10 +113,11 @@ class DatasetHandler:
            req_dict.save_as_json(req_path)
            ds_obj.get_requirements()
            # Save state of dataset with Datalad
            datalad.api.save(
                dataset=BidsDataset.dirname,
                message="Overwrite the converters in the BIDS Manager requirements.json file",
            )
            save_params = {
                "dataset": BidsDataset.dirname,
                "message": "Overwrite the converters in the BIDS Manager requirements.json file",
            }
            datalad.api.save(**save_params)

    @staticmethod
    def get_run(root_dir: str, bids_entities: dict, bids_modality: str):
+18 −6
Original line number Diff line number Diff line
@@ -67,8 +67,12 @@ class ParticipantHandler:
        # to make BIDS Validator happy
        post_import_bids_refinement(ds_obj.dirname)
        # Save dataset state with Datalad
        save_msg = f'Add files for subject(s): {input_data["subjects"]}'
        datalad.api.save(dataset=ds_obj.dirname, message=save_msg, recursive=True)
        save_params = {
            "dataset": ds_obj.dirname,
            "message": f'Add files for subject(s): {input_data["subjects"]}',
            "recursive": True,
        }
        datalad.api.save(**save_params)
        print(SUCCESS)

    def sub_delete(self, input_data=None):
@@ -86,8 +90,12 @@ class ParticipantHandler:
        # Refresh
        ds_obj.parse_bids()
        # Save dataset state with Datalad
        save_msg = f'Remove files for subject {input_data["subject"]}'
        datalad.api.save(dataset=ds_obj.dirname, message=save_msg, recursive=True)
        save_params = {
            "dataset": ds_obj.dirname,
            "message": f'Remove files for subject {input_data["subject"]}',
            "recursive": True,
        }
        datalad.api.save(**save_params)
        print(SUCCESS)

    def sub_delete_file(self, input_data=None):
@@ -109,8 +117,12 @@ class ParticipantHandler:
                    if file["subject"] not in subjects:
                        subjects.append(file["subject"])
        # Save dataset state with Datalad
        save_msg = f"Remove files for subjects {subjects}"
        datalad.api.save(dataset=ds_obj.dirname, message=save_msg, recursive=True)
        save_params = {
            "dataset": ds_obj.dirname,
            "message": f"Remove files for subjects {subjects}",
            "recursive": True,
        }
        datalad.api.save(**save_params)
        print(SUCCESS)

    def sub_get(self, input_data=None, output_file=None):
+18 −13
Original line number Diff line number Diff line
@@ -51,7 +51,12 @@ def initialize_project_structure(
        os.makedirs(project_dir / "documents" / folder, exist_ok=True)

    # Initialize the project dataset as a Datalad-managed dataset
    datalad.api.create(dataset=str(project_dir.absolute()), cfg_proc=["text2git"])
    create_params = {
        "dataset": str(project_dir.absolute()),
        "cfg_proc": ["text2git"],
        "force": True,  # Enforce dataset creation in a non-empty directory
    }
    datalad.api.create(**create_params)

    # Create initial project README.md file
    with open(project_dir / "README.md", "w") as f:
@@ -108,11 +113,12 @@ def create_project(input_data: str, output_file: str):
    with open(output_file, "w") as f:
        json.dump(dataset_content, f, indent=4)
    # Save the state of the dataset with Datalad
    datalad.api.save(
        dataset=str(project_dir.absolute()),
        message="Initial dataset state of collaborative project",
        recursive=False,  # Do not save the nested Datalad-BIDS dataset
    )
    save_params = {
        "dataset": str(project_dir.absolute()),
        "message": "Initial dataset state of collaborative project",
        "recursive": True,  # Do not save the nested Datalad-BIDS dataset
    }
    datalad.api.save(**save_params)
    print(SUCCESS)


@@ -246,11 +252,10 @@ def import_document(input_data: str):
    # Copy document from source to target
    shutil.copyfile(source_document_path, target_document_path)
    # Save dataset state with Datalad
    save_msg = (
        f'Add document {input_data["sourceDocumentAbsPath"]} '
        f"in project's documents/ folder"
    )
    datalad.api.save(
        dataset=input_data["targetDatasetPath"], message=save_msg, recursive=True
    )
    save_params = {
        "dataset": input_data["targetProjectAbsPath"],
        "message": f'Import document {input_data["sourceDocumentAbsPath"]} from HIP Center space',
        "recursive": True,
    }
    datalad.api.save(**save_params)
    print(SUCCESS)
+142 −40
Original line number Diff line number Diff line
@@ -3,74 +3,176 @@

"""Methods supporting versioning of BIDS and Collaborative project datasets."""

from datetime import datetime
import json
from sre_constants import SUCCESS
import datalad
from datalad.support.gitrepo import GitRepo


def create_tag(dataset_dir, tag, message):
    """Create a version tag on a dataset managed by Git/Datalad.
TAG_EXCEPTIONS = ["master", "main", "HEAD"]


def validate_tag(tag, discard_exceptions=False):
    """Validate a tag.

    Parameters
    ----------
    dataset_dir : str
        Absolute path to the dataset directory.

    tag : str
        Tag in the format.
        Tag to validate.

    message : str
        Message to be associated with the tag.
    discard_exceptions : bool
        If True, discard the exceptions listed by `TAG_EXCEPTIONS`
        and return False.

    Returns
    -------
    res : datalad.utils.Result
        Result of the Datalad save operation.
    bool
        True if the tag is valid, False otherwise.
    """
    # Create a tag on the dataset
    res = datalad.api.save(
        dataset=dataset_dir,
        message=message,
        version_tag=tag,
    # Check if the tag is in the format X.Y.Z
    # where X, Y, and Z are integers via regex
    # https://stackoverflow.com/questions/1265665/how-can-i-check-if-a-string-represents-an-int-without-using-try-except
    try:
        if not all(0 <= int(n) < 256 for n in tag.split(".")):
            return False
    except ValueError:
        if (not discard_exceptions) and (tag in TAG_EXCEPTIONS):
            return True
        else:
            return False
    return True


def create_tag(input_data):
    """Create a version tag on a dataset managed by Git/Datalad.

    **Note:** The tag is created on a Datalad dataset, not on a specific file.
    If the dataset is a BIDS dataset, the tag will be created only on this dataset.
    If the dataset is a Collaborative Project, the tag will be created recursively on
      the project dataset and on the nested BIDS dataset of the project.


    Parameters
    ----------
    input_data : dict
        Dictionary containing the input data for the command
        in the format::

            {
                "path": "/path/to/dataset",
                "tag": "1.0.0",
                "message": "Description of the changes related to the version tag"
            }
    """
    # Load input data
    with open(input_data, "r") as f:
        input_data = json.load(f)
    print(f"Create tag {input_data['tag']} for dataset {input_data['path']}...")

    # Check if the tag is valid
    if not validate_tag(input_data["tag"], discard_exceptions=True):
        raise ValueError(
            f"Impossible to create tag {input_data['tag']}. The format is not valid. "
            "Please use the format X.Y.Z, where X, Y, and Z are integers."
        )
    # Check if the tag already exists
    if input_data["tag"] in [
        tag_dict["name"] for tag_dict in GitRepo(input_data["path"]).get_tags()
    ]:
        raise ValueError(
            f"Impossible to create tag {input_data['tag']}. "
            f"Tag {input_data['tag']} already exists."
        )
    # Create a tag on the dataset
    save_params = {
        "dataset": input_data["path"],
        "message": input_data["message"],
        "version_tag": input_data["tag"],
        "recursive": True,
    }
    res = datalad.api.save(**save_params)
    print(f"Tag creation results: {res}")
    return res
    print(SUCCESS)


def get_tags(dataset_dir):
def get_tags(input_data, output_file):
    """Get the list of tags of a dataset managed by Git/Datalad via subprocess.

    Parameters
    ----------
    dataset_dir : str
        Absolute path to the dataset directory.

    Returns
    -------
    tags : list
        List of tags of the dataset.
    input_data : dict
        Dictionary containing the input data for the command
        in the format::

            {
                "path": "/path/to/dataset",
            }

    output_file : str
        Absolute path to the output JSON file containing the tags
        in the format::

            {
                "path": "/path/to/dataset",
                "tags": ["1.0.0", "1.0.1", "1.1.0"]
            }
    """
    tags = datalad.support.gitrepo.GitRepo(dataset_dir).get_tags()
    # Load input data
    with open(input_data, "r") as f:
        input_data = json.load(f)

    tags = [tag_dict["name"] for tag_dict in GitRepo(input_data["path"]).get_tags()]
    dict_tags = {
        "path": input_data["path"],
        "tags": tags,
    }

    print(f"Tags: {tags}")
    return tags
    # Save the tags to a JSON file
    with open(output_file, "w") as f:
        json.dump(dict_tags, f)
    print(SUCCESS)


def checkout_tag(dataset_dir, tag):
def checkout_tag(input_data):
    """Checkout a specific tag of a dataset managed by Git/Datalad via subprocess.

    **Note:** The master / main branch / or the HEAD can also be checked out by specifying
      "master" / "main" / "HEAD" for the tag value.

    Parameters
    ----------
    dataset_dir : str
        Absolute path to the dataset directory.

    tag : str
        Tag to checkout.

    Returns
    -------
    res : datalad.utils.Result
        Result of the Datalad checkout operation.
    input_data : dict
        Dictionary containing the input data for the command
        in the format::

            {
                "path": "/path/to/dataset",
                "tag": "1.0.0",
            }
    """
    # Load input data
    with open(input_data, "r") as f:
        input_data = json.load(f)
    # Check of the tag format is valid
    if not validate_tag(input_data["tag"]):
        raise ValueError(
            f"Impossible to checkout tag {input_data['tag']}. The format is not valid. "
            "Please use the format X.Y.Z, where X, Y, and Z are integers, or "
            "specify 'master' / 'main' to checkout the master / main branch."
        )
    # Set the name and options for tag / branch checkout
    if input_data["tag"] not in TAG_EXCEPTIONS:
        name = f"tags/{input_data['tag']}"
        checkout_opts = ["-b", f"{input_data['tag']}", "--force"]
    else:
        name = input_data["tag"]
        checkout_opts = ["--force"]
    # Checkout a specific tag of the dataset to a new eponymous branch
    datalad.support.gitrepo.GitRepo(dataset_dir).checkout(
        name=f"tags/{tag}",
        options=["-b {tag}", "--force"],
    # or checkout the master / main branch / HEAD
    GitRepo(input_data["path"]).checkout(
        name=name,
        options=checkout_opts,
    )
    print(SUCCESS)
Loading