src/wrappers/PhyxWrapper.js

const fs = require('fs');
const path = require('path');

/** Helper methods from lodash. */
const { has, cloneDeep, uniq } = require('lodash');

/** For NQuads export. */
const JSONLD = require('jsonld');

const owlterms = require('../utils/owlterms');

const { PhylorefWrapper } = require('./PhylorefWrapper');
const { PhylogenyWrapper } = require('./PhylogenyWrapper');
const { CitationWrapper } = require('./CitationWrapper');

/**
 * The PhyxWrapper wraps an entire Phyx document.
 */

class PhyxWrapper {
  /**
   * Wraps an entire PHYX document.
   * @param {Object} phyx - The Phyx structure to wrap.
   * @param {function(newick: string): {name: string, children: Object[]}}
   *    [newickParser=PhylogenyWrapper.getParsedNewick] - A method
   *    that accepts a Newick string and returns a list of nodes. Each node should have a
   *    'children' key with its children and optionally a 'name' key with its label. This
   *    code previously depended on phylotree.js, whose newick_parser() function works exactly
   *    like this. This option allows you to drop in Phylotree's newick_parser() or -- if you
   *    prefer -- any other option.
   */
  constructor(phyx, newickParser = PhylogenyWrapper.getParsedNewick) {
    //
    this.phyx = phyx;
    this.newickParser = newickParser;
  }

  // Determine a 'default nomenclatural code' for this Phyx file. There are
  // two ways to do this:
  //  1. If the Phyx file has a 'defaultNomenclaturalCodeIRI' property, we use that.
  //  2. Otherwise, we check to see if every phyloref in this file has the same
  //     nomenclatural code. If so, we can use that code. If not, i.e. if any of
  //     the phylorefs are missing a nomenclatural code or include a specifier,
  //     we default to owlterms.UNKNOWN_CODE.
  get defaultNomenCode() {
    if (has(this.phyx, 'defaultNomenclaturalCodeIRI')) return this.phyx.defaultNomenclaturalCodeIRI;
    const nomenCodes = (this.phyx.phylorefs || [])
      .map(phyloref => new PhylorefWrapper(phyloref).defaultNomenCode);
    const uniqNomenCodes = uniq(nomenCodes);
    if (uniqNomenCodes.length === 1) return uniqNomenCodes[0];
    return owlterms.UNKNOWN_CODE;
  }

  /**
   * Return a provided Phyx document as a normalized JSON document. We ignore most keys -- including
   * keys we don't know -- but any key that can be wrapped by one of the other Wrappers in this
   * package will be wrapped and normalized before being returned.
   *
   * Normalization is mostly needed for TaxonomicUnitWrappers and its subclasses
   * (TaxonConceptWrapper, TaxonNameWrapper), since these can be represented in several essentially
   * identical ways. But if we implement it at every level, we can implement comparison code in
   * Klados easily.
   *
   * Two Phyx documents should -- upon being normalized -- be comparable with each other with
   * lodash.deepEqual().
   */
  static normalize(phyxDocument) {
    const normalizedDocument = cloneDeep(phyxDocument);

    normalizedDocument.phylorefs = (phyxDocument.phylorefs || []).map(PhylorefWrapper.normalize);
    normalizedDocument.phylogenies = (phyxDocument.phylogenies || [])
      .map(PhylogenyWrapper.normalize);
    if ('source' in phyxDocument) {
      normalizedDocument.source = CitationWrapper.normalize(phyxDocument.source);
    }

    return normalizedDocument;
  }

  /**
   * Generate an executable ontology from this Phyx document. The document is mostly in JSON-LD
   * already, except for three important things:
   *    1. We have to convert all phylogenies into a series of statements relating to the nodes
   *       inside these phylogenies.
   *    2. We have to convert phylogenies into OWL restrictions.
   *    3. Insert all matches between taxonomic units in this file.
   *
   * @param {string} [baseIRI=""] - The base IRI to use when generating this Phyx document.
   *    This should include a trailing '#' or '/'. Use '' to indicate that relative IDs
   *    should be generated in the produced ontology (e.g. '#phylogeny1'). Note that if a
   *    baseIRI is provided, then relative IDs already in the Phyx file (identified by an
   *    initial '#') will be turned into absolute IDs by removing the initial `#` and
   *    prepending them with the baseIRI.
   * @return {Object} This Phyx document as an OWL ontology as a JSON-LD object.
   */
  asJSONLD(baseIRI = '') {
    const jsonld = cloneDeep(this.phyx);

    // Some helper methods for generating base IRIs for phylorefs and phylogenies.
    function getBaseIRIForPhyloref(index) {
      if (baseIRI) return `${baseIRI}phyloref${index}`;
      return `#phyloref${index}`;
    }

    function getBaseIRIForPhylogeny(index) {
      if (baseIRI) return `${baseIRI}phylogeny${index}`;
      return `#phylogeny${index}`;
    }

    // Given a relative ID (e.g. '#phylo1') make it absolute (`${baseIRI}phylo1`).
    function makeIDAbsolute(phylogenyId) {
      if (baseIRI && phylogenyId.startsWith('#')) return `${baseIRI}${phylogenyId.substring(1)}`; // Remove the initial '#'.
      return phylogenyId;
    }

    // Given an absolute ID (`${baseIRI}phylo1`) make it relative (e.g. '#phylo1').
    function makeIDRelative(phylogenyId) {
      if (phylogenyId.startsWith(baseIRI)) return `#${phylogenyId.substring(baseIRI.length)}`;
      return phylogenyId;
    }

    if (has(jsonld, 'phylorefs')) {
      // We might have phyloref IDs set to relative IRIs (e.g. "#phyloref0").
      // If the baseIRI is set to '', that's fine. But if not, we'll add it
      // to the relative IRI to make it absolute. This seems to avoid problems
      // with some JSON-LD parsers.
      if (baseIRI) {
        jsonld.phylorefs = jsonld.phylorefs.map((phyloref) => {
          if ((phyloref['@id'] || '').startsWith('#')) {
            const modifiedPhyloref = cloneDeep(phyloref);
            modifiedPhyloref['@id'] = makeIDAbsolute(phyloref['@id']);
            return modifiedPhyloref;
          }
          return phyloref;
        });
      }

      // Convert phyloreferences into an OWL class restriction
      jsonld.phylorefs = jsonld.phylorefs.map(
        (phyloref, countPhyloref) => new PhylorefWrapper(phyloref, this.defaultNomenCode)
          .asJSONLD(getBaseIRIForPhyloref(countPhyloref))
      );
    }

    if (has(jsonld, 'phylogenies')) {
      // We might have phyloref IDs set to relative IRIs (e.g. "#phyloref0").
      // If the baseIRI is set to '', that's fine. But if not, we'll add it
      // to the relative IRI to make it absolute. This seems to avoid problems
      // with some JSON-LD parsers.
      if (baseIRI) {
        jsonld.phylogenies = jsonld.phylogenies.map((phylogeny) => {
          if ((phylogeny['@id'] || '').startsWith('#')) {
            const modifiedPhylogeny = cloneDeep(phylogeny);
            modifiedPhylogeny['@id'] = makeIDAbsolute(phylogeny['@id']);
            return modifiedPhylogeny;
          }
          return phylogeny;
        });
      }

      // Add descriptions for individual nodes in each phylogeny.
      jsonld.phylogenies = jsonld.phylogenies.map(
        (phylogeny, countPhylogeny) => new PhylogenyWrapper(phylogeny, this.defaultNomenCode)
          .asJSONLD(getBaseIRIForPhylogeny(countPhylogeny), this.newickParser)
      );

      // Go through all the nodes and add information on expected resolution.
      jsonld.phylogenies.forEach((phylogeny) => {
        const phylogenyId = phylogeny['@id'];
        (phylogeny.nodes || []).forEach((node) => {
          // We can't set expected resolution information on unlabeled nodes.
          if (!node.labels) return;

          jsonld.phylorefs.forEach((phyloref) => {
            const phylorefId = phyloref['@id'];

            // There are two ways in which we determine that a phyloreference
            // is expected to resolve to a node:
            //  (1) If nodeLabel is set, then that must be one of the node's labels.
            //  (2) If nodeLabel is not set, then one of the node's label should be
            //      identical to the phyloreference's label.
            //
            // We set flagNodeExpectsPhyloref in all cases where we should note
            // that this node expects to resolve to this phyloreference.
            let flagNodeExpectsPhyloref = false;

            // console.log(`Testing expected resolution of '${phylorefId}' on `
            // + `'${phylogenyId}' (${makeIDRelative(phylogenyId)}).`);

            if (
              has(phyloref, 'expectedResolution')
              && (
                // The user might have used the absolute phylogeny ID here.
                has(phyloref.expectedResolution, phylogenyId)

                // Or they might have used a relative phylogeny ID.
                || has(phyloref.expectedResolution, makeIDRelative(phylogenyId))
              )
            ) {
              // Expected resolution information set! The node label mentioned in that
              // information must be identical to one of the labels of this phylogeny node.

              // Figure out which phylogenyId was matched here.
              const nodeLabel = has(phyloref.expectedResolution, phylogenyId)
                ? phyloref.expectedResolution[phylogenyId].nodeLabel
                : phyloref.expectedResolution[makeIDRelative(phylogenyId)].nodeLabel;

              if (nodeLabel && (node.labels || []).includes(nodeLabel)) {
                flagNodeExpectsPhyloref = true;
              }
            } else if ((node.labels || []).includes(phyloref.label)) {
              // No expected resolution information, so we just check whether
              // the phyloref label is one of the labels on this phylogeny node.
              flagNodeExpectsPhyloref = true;
            }

            if (flagNodeExpectsPhyloref) {
              // Modify this phylogeny node's type to include that it is a type
              // of:
              //  obi:is_specified_output_of some (
              //    obi:Prediction and obi:has_specified_output some #phyloref_id
              //  )
              node[owlterms.RDF_TYPE].push({
                '@type': owlterms.OWL_RESTRICTION,
                onProperty: owlterms.OBI_IS_SPECIFIED_OUTPUT_OF,
                someValuesFrom: {
                  '@type': owlterms.OWL_CLASS,
                  intersectionOf: [
                    { '@id': owlterms.OBI_PREDICTION },
                    {
                      '@type': owlterms.OWL_RESTRICTION,
                      onProperty: owlterms.OBI_HAS_SPECIFIED_INPUT,
                      someValuesFrom: {
                        '@id': phylorefId,
                      },
                    },
                  ],
                },
              });
            }
          });
        });
      });
    }

    // Earlier, we used to generate a list of taxonomic matches here (stored in
    // jsonld.hasTaxonomicUnitMatches) that logically expressed how taxonomic
    // units in phyloref specifiers were related to taxonomic units in phylogeny
    // nodes. This is no longer necessary, since phyloref specifiers are now logical
    // expressions that directly evaluate to phylogeny nodes. However, if in the
    // future we decide that we need to perform more advanced TU matching, this
    // would be the place to implement that.

    // If there is a top-level source, generate a bibliographicCitation for it.
    if (has(jsonld, 'source')) {
      jsonld.source.bibliographicCitation = new CitationWrapper(jsonld.source).toString();
    }

    // Set up the top-level object '@type'. If one is present, we add our terms to that.
    if (!has(jsonld, '@type')) jsonld['@type'] = [];
    if (!Array.isArray(jsonld['@type'])) jsonld['@type'] = [jsonld['@type']];
    jsonld['@type'].push('owl:Ontology');

    // Set up the ontology imports. If one is present, we add our imports to that.
    if (!has(jsonld, 'owl:imports')) jsonld['owl:imports'] = [];
    if (!Array.isArray(jsonld['owl:imports'])) jsonld['owl:imports'] = [jsonld['owl:imports']];
    jsonld['owl:imports'].push('http://ontology.phyloref.org/2018-12-14/phyloref.owl');
    jsonld['owl:imports'].push('http://ontology.phyloref.org/2018-12-14/tcan.owl');

    // If the '@context' is missing, add it here.
    if (!has(jsonld, '@context')) {
      jsonld['@context'] = owlterms.PHYX_CONTEXT_JSON;
    }

    return jsonld;
  }

  /**
   * Generate an executable ontology from this Phyx document as N-Quads. Under the
   * hood, we generate an OWL/JSON-LD representation of this Phyx document, and then
   * convert it into N-Quads so that OWLAPI-supporting tools can directly consume it.
   *
   * @param {string} [baseIRI=""] - The base IRI to use when generating this Phyx document.
   *    This should include a trailing '#' or '/'. Use '' to indicate that relative IDs
   *    should be generated in the produced ontology (e.g. '#phylogeny1'). Note that if a
   *    baseIRI is provided, then relative IDs already in the Phyx file (identified by an
   *    initial '#') will be turned into absolute IDs by removing the initial `#` and
   *    prepending them with the baseIRI.
   * @param {string} [filePath=undefined] - The path of the Phyx file being converted.
   *    Used only if the `@context` of the file is a relative path.
   * @return {Promise[string]} A Promise to return this Phyx document as a string that can
   *    be written to an N-Quads file.
   */
  toRDF(baseIRI = '', filePath = undefined) {
    const owlJSONLD = this.asJSONLD(baseIRI);

    // For the purposes of testing, we are sometimes given a relative path to `@context`,
    // but the JSONLD package does not support this. Instead, we'll import the contents
    // of the relative path on the fly.
    if (filePath && has(owlJSONLD, '@context') && owlJSONLD['@context'].startsWith('.')) {
      owlJSONLD['@context'] = JSON.parse(fs.readFileSync(
        path.resolve(filePath, owlJSONLD['@context'])
      ));
    }

    return JSONLD.toRDF(owlJSONLD, { format: 'application/n-quads' });
  }
}

module.exports = {
  PhyxWrapper,
};