# --*- Makefile -*--

# Split an input SDF file(s) into separate files, one structure per
# file, and manage the split files further using GNU Make.

INPUT_DIR = inputs
OUTPUT_DIR = outputs
SPLIT_DIR = ${OUTPUT_DIR}/split
MOLWT_DIR = ${OUTPUT_DIR}/mass

INPUT_SDF_FILES = $(wildcard ${INPUT_DIR}/*.sdf)

# We use shell to generate output file names, in the same way 'babel'
# generates them. This requires, unfortunately, some "insider
# knowledge": we assume that 'babel' will number files from 1 to 999
# if there are 999 structures, and will not append any characters. The
# split structures from each input file are placed into a separate
# subdirectory, the name of this subdirectory is the basename of the
# input file without the .sdf extension:

OUTPUT_SPLIT_FILES := $(shell for i in ${INPUT_SDF_FILES}; do \
	for j in $$(seq 1 $$(grep '^ *[0-9].* V2000$$' $$i | wc -l)); do \
		echo ${SPLIT_DIR}/$$(basename $$i .sdf)/$$j.xyz; \
	done; \
done )

# The FIRST_SPLIT_FILES will contain names of the '1.xyz' file in each
# subdirectory; the REST_OF_SPLIT_FILES will be the list of all other
# split files:

FIRST_SPLIT_FILES    = $(filter %/1.xyz, ${OUTPUT_SPLIT_FILES})
REST_OF_SPLIT_FILES = $(filter-out %/1.xyz, ${OUTPUT_SPLIT_FILES})

# We can now use generated split file names to generate the
# intermediate computed files; molecular weight files are used as an
# example, but usually it is worth to store these intermediate files
# only after more complex calculations:

BABEL = babel

SPLIT_DEPEND = .split.d

.PHONY: all clean distclean

all: ${OUTPUT_SPLIT_FILES}

include ${SPLIT_DEPEND}

${SPLIT_DEPEND}: ${INPUT_SDF_FILES}
	date "+# %F %T %Z" > $@
	echo ${OUTPUT_SPLIT_FILES} \
	| tr " " "\n" \
	| grep . \
	| awk '{depend = $$1; \
		sub("/[0-9]+\\.xyz$$","/1.xyz",depend); \
		print $$1 ": " depend}' \
	| grep -v '/1.xyz: ' \
	>> $@

${SPLIT_DIR}/%/1.xyz: ${INPUT_DIR}/%.sdf
	@mkdir -p $(dir $@)
	${BABEL} $< ${SPLIT_DIR}/$*/.xyz -m

#------------------------------------------------------------------------------

.PHONY: clean-split-sdf

clean-split-sdf:
	rm -f ${OUTPUT_SPLIT_FILES}
	rm -f ${SPLIT_DEPEND}

clean: clean-split-sdf