fix: make changes in Datalad-related code that make tests working (5b7989e8) · Commits · HIP / DataHIPy

datahipy/bids/dataset.py

+30 −6

Original line number	Diff line number	Diff line
		@@ -82,7 +82,7 @@ def create_initial_participants_tsv(bids_dir):
		f.write("participant_id\tage\tsex\tgroup")


		def create_empty_bids_dataset(bids_dir=None, dataset_desc=None):
		def create_empty_bids_dataset(bids_dir=None, dataset_desc=None, project_dir=None):
		"""Create an empty BIDS dataset.

		Parameters
		@@ -92,13 +92,29 @@ def create_empty_bids_dataset(bids_dir=None, dataset_desc=None):

		dataset_desc : dict
		Dictionary with the content of the dataset_description.json file.

		project_dir : str
		Path to the project directory in which the BIDS dataset will be nested.
		"""
		print("> Creating an empty BIDS dataset at: ", bids_dir, "...")
		bids_dirname = os.path.basename(bids_dir)
		# Create the BIDS dataset directory if it does not exist
		if not os.path.exists(os.path.dirname(bids_dir)):
		os.makedirs(bids_dir, exist_ok=True)
		# Initialize the BIDS dataset as a Datalad-managed dataset
		datalad.api.create(dataset=bids_dir, cfg_proc=["text2git", "bids"])
		# Initialize the BIDS dataset as a Datalad-managed dataset.
		create_params = {
		"cfg_proc": ["text2git", "bids"],
		"force": True, # Enforce dataset creation in a non-empty directory
		}
		# If project_dir is specified, create the dataset as a subdataset
		# of the project dataset
		if project_dir:
		create_params["dataset"] = project_dir
		create_params["path"] = bids_dirname
		# Otherwise, create a standalone dataset
		else:
		create_params["dataset"] = bids_dir
		datalad.api.create(**create_params)
		# Create the dataset_description.json file
		with open(os.path.join(bids_dir, "dataset_description.json"), "w") as f:
		json.dump(dataset_desc, f, indent=4)
		@@ -112,9 +128,17 @@ def create_empty_bids_dataset(bids_dir=None, dataset_desc=None):
		# Create an initial participants.tsv file
		create_initial_participants_tsv(bids_dir)
		# Save the state of the initial dataset
		datalad.api.save(
		dataset=bids_dir, message="Initial blank BIDS dataset", recursive=True
		)
		save_params = {
		"message": "Initial blank BIDS dataset of collaborative project",
		"recursive": False,
		}
		if project_dir:
		save_params["dataset"] = project_dir
		save_params["path"] = bids_dirname
		else:
		save_params["dataset"] = bids_dir
		datalad.api.save(**save_params)
		print(SUCCESS)


		def create_bids_layout(bids_dir=None, **kwargs):

datahipy/handlers/dataset.py

+17 −8

Original line number	Diff line number	Diff line
		@@ -45,15 +45,23 @@ class DatasetHandler:
		if not os.path.isdir(ds_path):
		os.makedirs(ds_path)
		# Initialize the BIDS dataset as a Datalad-managed dataset
		datalad.api.create(dataset=ds_path, cfg_proc=["text2git", "bids"])
		create_params = {
		"dataset": ds_path,
		"cfg_proc": ["text2git", "bids"],
		"force": True, # Enforce dataset creation in a non-empty directory
		}
		datalad.api.create(**create_params)
		datasetdesc_path = os.path.join(ds_path, "dataset_description.json")
		if not os.path.isfile(datasetdesc_path):
		# Write the dataset_description.json file
		datasetdesc_dict.write_file(jsonfilename=datasetdesc_path)
		# Save the state of the dataset with Datalad
		datalad.api.save(
		dataset=ds_path, message="Initial BIDS dataset state", recursive=True
		)
		save_params = {
		"dataset": ds_path,
		"message": "Initial BIDS dataset state",
		"recursive": True,
		}
		datalad.api.save(**save_params)
		# Load the created BIDS dataset in BIDS Manager (creates companion files)
		ds_obj = BidsDataset(ds_path)
		if ds_obj:
		@@ -105,10 +113,11 @@ class DatasetHandler:
		req_dict.save_as_json(req_path)
		ds_obj.get_requirements()
		# Save state of dataset with Datalad
		datalad.api.save(
		dataset=BidsDataset.dirname,
		message="Overwrite the converters in the BIDS Manager requirements.json file",
		)
		save_params = {
		"dataset": BidsDataset.dirname,
		"message": "Overwrite the converters in the BIDS Manager requirements.json file",
		}
		datalad.api.save(**save_params)

		@staticmethod
		def get_run(root_dir: str, bids_entities: dict, bids_modality: str):

datahipy/handlers/participants.py

+18 −6

Original line number	Diff line number	Diff line
		@@ -67,8 +67,12 @@ class ParticipantHandler:
		# to make BIDS Validator happy
		post_import_bids_refinement(ds_obj.dirname)
		# Save dataset state with Datalad
		save_msg = f'Add files for subject(s): {input_data["subjects"]}'
		datalad.api.save(dataset=ds_obj.dirname, message=save_msg, recursive=True)
		save_params = {
		"dataset": ds_obj.dirname,
		"message": f'Add files for subject(s): {input_data["subjects"]}',
		"recursive": True,
		}
		datalad.api.save(**save_params)
		print(SUCCESS)

		def sub_delete(self, input_data=None):
		@@ -86,8 +90,12 @@ class ParticipantHandler:
		# Refresh
		ds_obj.parse_bids()
		# Save dataset state with Datalad
		save_msg = f'Remove files for subject {input_data["subject"]}'
		datalad.api.save(dataset=ds_obj.dirname, message=save_msg, recursive=True)
		save_params = {
		"dataset": ds_obj.dirname,
		"message": f'Remove files for subject {input_data["subject"]}',
		"recursive": True,
		}
		datalad.api.save(**save_params)
		print(SUCCESS)

		def sub_delete_file(self, input_data=None):
		@@ -109,8 +117,12 @@ class ParticipantHandler:
		if file["subject"] not in subjects:
		subjects.append(file["subject"])
		# Save dataset state with Datalad
		save_msg = f"Remove files for subjects {subjects}"
		datalad.api.save(dataset=ds_obj.dirname, message=save_msg, recursive=True)
		save_params = {
		"dataset": ds_obj.dirname,
		"message": f"Remove files for subjects {subjects}",
		"recursive": True,
		}
		datalad.api.save(**save_params)
		print(SUCCESS)

		def sub_get(self, input_data=None, output_file=None):

datahipy/handlers/project.py

+18 −13

Original line number	Diff line number	Diff line
		@@ -51,7 +51,12 @@ def initialize_project_structure(
		os.makedirs(project_dir / "documents" / folder, exist_ok=True)

		# Initialize the project dataset as a Datalad-managed dataset
		datalad.api.create(dataset=str(project_dir.absolute()), cfg_proc=["text2git"])
		create_params = {
		"dataset": str(project_dir.absolute()),
		"cfg_proc": ["text2git"],
		"force": True, # Enforce dataset creation in a non-empty directory
		}
		datalad.api.create(**create_params)

		# Create initial project README.md file
		with open(project_dir / "README.md", "w") as f:
		@@ -108,11 +113,12 @@ def create_project(input_data: str, output_file: str):
		with open(output_file, "w") as f:
		json.dump(dataset_content, f, indent=4)
		# Save the state of the dataset with Datalad
		datalad.api.save(
		dataset=str(project_dir.absolute()),
		message="Initial dataset state of collaborative project",
		recursive=False, # Do not save the nested Datalad-BIDS dataset
		)
		save_params = {
		"dataset": str(project_dir.absolute()),
		"message": "Initial dataset state of collaborative project",
		"recursive": True, # Do not save the nested Datalad-BIDS dataset
		}
		datalad.api.save(**save_params)
		print(SUCCESS)


		@@ -246,11 +252,10 @@ def import_document(input_data: str):
		# Copy document from source to target
		shutil.copyfile(source_document_path, target_document_path)
		# Save dataset state with Datalad
		save_msg = (
		f'Add document {input_data["sourceDocumentAbsPath"]} '
		f"in project's documents/ folder"
		)
		datalad.api.save(
		dataset=input_data["targetDatasetPath"], message=save_msg, recursive=True
		)
		save_params = {
		"dataset": input_data["targetProjectAbsPath"],
		"message": f'Import document {input_data["sourceDocumentAbsPath"]} from HIP Center space',
		"recursive": True,
		}
		datalad.api.save(**save_params)
		print(SUCCESS)

datahipy/versioning.py

+142 −40

Original line number	Diff line number	Diff line
		@@ -3,74 +3,176 @@

		"""Methods supporting versioning of BIDS and Collaborative project datasets."""

		from datetime import datetime
		import json
		from sre_constants import SUCCESS
		import datalad
		from datalad.support.gitrepo import GitRepo


		def create_tag(dataset_dir, tag, message):
		"""Create a version tag on a dataset managed by Git/Datalad.
		TAG_EXCEPTIONS = ["master", "main", "HEAD"]


		def validate_tag(tag, discard_exceptions=False):
		"""Validate a tag.

		Parameters
		----------
		dataset_dir : str
		Absolute path to the dataset directory.

		tag : str
		Tag in the format.
		Tag to validate.

		message : str
		Message to be associated with the tag.
		discard_exceptions : bool
		If True, discard the exceptions listed by `TAG_EXCEPTIONS`
		and return False.

		Returns
		-------
		res : datalad.utils.Result
		Result of the Datalad save operation.
		bool
		True if the tag is valid, False otherwise.
		"""
		# Create a tag on the dataset
		res = datalad.api.save(
		dataset=dataset_dir,
		message=message,
		version_tag=tag,
		# Check if the tag is in the format X.Y.Z
		# where X, Y, and Z are integers via regex
		# https://stackoverflow.com/questions/1265665/how-can-i-check-if-a-string-represents-an-int-without-using-try-except
		try:
		if not all(0 <= int(n) < 256 for n in tag.split(".")):
		return False
		except ValueError:
		if (not discard_exceptions) and (tag in TAG_EXCEPTIONS):
		return True
		else:
		return False
		return True


		def create_tag(input_data):
		"""Create a version tag on a dataset managed by Git/Datalad.

		Note: The tag is created on a Datalad dataset, not on a specific file.
		If the dataset is a BIDS dataset, the tag will be created only on this dataset.
		If the dataset is a Collaborative Project, the tag will be created recursively on
		the project dataset and on the nested BIDS dataset of the project.


		Parameters
		----------
		input_data : dict
		Dictionary containing the input data for the command
		in the format::

		{
		"path": "/path/to/dataset",
		"tag": "1.0.0",
		"message": "Description of the changes related to the version tag"
		}
		"""
		# Load input data
		with open(input_data, "r") as f:
		input_data = json.load(f)
		print(f"Create tag {input_data['tag']} for dataset {input_data['path']}...")

		# Check if the tag is valid
		if not validate_tag(input_data["tag"], discard_exceptions=True):
		raise ValueError(
		f"Impossible to create tag {input_data['tag']}. The format is not valid. "
		"Please use the format X.Y.Z, where X, Y, and Z are integers."
		)
		# Check if the tag already exists
		if input_data["tag"] in [
		tag_dict["name"] for tag_dict in GitRepo(input_data["path"]).get_tags()
		]:
		raise ValueError(
		f"Impossible to create tag {input_data['tag']}. "
		f"Tag {input_data['tag']} already exists."
		)
		# Create a tag on the dataset
		save_params = {
		"dataset": input_data["path"],
		"message": input_data["message"],
		"version_tag": input_data["tag"],
		"recursive": True,
		}
		res = datalad.api.save(**save_params)
		print(f"Tag creation results: {res}")
		return res
		print(SUCCESS)


		def get_tags(dataset_dir):
		def get_tags(input_data, output_file):
		"""Get the list of tags of a dataset managed by Git/Datalad via subprocess.

		Parameters
		----------
		dataset_dir : str
		Absolute path to the dataset directory.

		Returns
		-------
		tags : list
		List of tags of the dataset.
		input_data : dict
		Dictionary containing the input data for the command
		in the format::

		{
		"path": "/path/to/dataset",
		}

		output_file : str
		Absolute path to the output JSON file containing the tags
		in the format::

		{
		"path": "/path/to/dataset",
		"tags": ["1.0.0", "1.0.1", "1.1.0"]
		}
		"""
		tags = datalad.support.gitrepo.GitRepo(dataset_dir).get_tags()
		# Load input data
		with open(input_data, "r") as f:
		input_data = json.load(f)

		tags = [tag_dict["name"] for tag_dict in GitRepo(input_data["path"]).get_tags()]
		dict_tags = {
		"path": input_data["path"],
		"tags": tags,
		}

		print(f"Tags: {tags}")
		return tags
		# Save the tags to a JSON file
		with open(output_file, "w") as f:
		json.dump(dict_tags, f)
		print(SUCCESS)


		def checkout_tag(dataset_dir, tag):
		def checkout_tag(input_data):
		"""Checkout a specific tag of a dataset managed by Git/Datalad via subprocess.

		Note: The master / main branch / or the HEAD can also be checked out by specifying
		"master" / "main" / "HEAD" for the tag value.

		Parameters
		----------
		dataset_dir : str
		Absolute path to the dataset directory.

		tag : str
		Tag to checkout.

		Returns
		-------
		res : datalad.utils.Result
		Result of the Datalad checkout operation.
		input_data : dict
		Dictionary containing the input data for the command
		in the format::

		{
		"path": "/path/to/dataset",
		"tag": "1.0.0",
		}
		"""
		# Load input data
		with open(input_data, "r") as f:
		input_data = json.load(f)
		# Check of the tag format is valid
		if not validate_tag(input_data["tag"]):
		raise ValueError(
		f"Impossible to checkout tag {input_data['tag']}. The format is not valid. "
		"Please use the format X.Y.Z, where X, Y, and Z are integers, or "
		"specify 'master' / 'main' to checkout the master / main branch."
		)
		# Set the name and options for tag / branch checkout
		if input_data["tag"] not in TAG_EXCEPTIONS:
		name = f"tags/{input_data['tag']}"
		checkout_opts = ["-b", f"{input_data['tag']}", "--force"]
		else:
		name = input_data["tag"]
		checkout_opts = ["--force"]
		# Checkout a specific tag of the dataset to a new eponymous branch
		datalad.support.gitrepo.GitRepo(dataset_dir).checkout(
		name=f"tags/{tag}",
		options=["-b {tag}", "--force"],
		# or checkout the master / main branch / HEAD
		GitRepo(input_data["path"]).checkout(
		name=name,
		options=checkout_opts,
		)
		print(SUCCESS)