Source code for eutils._internal.client

# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, print_function, unicode_literals

import logging
import os

import lxml.etree as le

from .exceptions import EutilsError
from .queryservice import QueryService
from .xmlfacades.dbsnp import ExchangeSet
from .xmlfacades.einforesult import EInfoResult
from .xmlfacades.entrezgeneset import EntrezgeneSet
from .xmlfacades.esearchresult import ESearchResult
from .xmlfacades.gbset import GBSet
from .xmlfacades.pubmedarticleset import PubmedArticleSet
from .xmlfacades.pubmedcentralarticleset import PubmedCentralArticleSet


logger = logging.getLogger(__name__)


[docs]class Client(object):
    """class-based access to NCBI E-Utilities, returning Python classes
    with rich data accessors

    """

    def __init__(self, cache=False, api_key=None):
        """
        :param str cache: passed to QueryService, which see for explanation
        :param str api_key: API key from NCBI
        :raises EutilsError: if cache file couldn't be created

        """

        self._qs = QueryService(cache=cache, api_key=api_key)


    @property
    def databases(self):
        """
        list of databases available from eutils (per einfo query)
        """
        try:
            return self._databases
        except AttributeError:
            self._databases = self.einfo().databases
            return self._databases

[docs]    def einfo(self, db=None):
        """query the einfo endpoint

        :param db: string (optional)
        :rtype: EInfo or EInfoDB object

        If db is None, the reply is a list of databases, which is returned
        in an EInfo object (which has a databases() method).

        If db is not None, the reply is information about the specified
        database, which is returned in an EInfoDB object.  (Version 2.0
        data is automatically requested.)
        """

        if db is None:
            return EInfoResult(self._qs.einfo()).dblist
        return EInfoResult(self._qs.einfo({"db": db, "version": "2.0"})).dbinfo

[docs]    def esearch(self, db, term):
        """query the esearch endpoint
        """
        esr = ESearchResult(self._qs.esearch({"db": db, "term": term}))
        if esr.count > esr.retmax:
            logger.warning("NCBI found {esr.count} results, but we truncated the reply at {esr.retmax}"
                        " results; see https://github.com/biocommons/eutils/issues/124/".format(esr=esr))
        return esr

[docs]    def efetch(self, db, id):
        """query the efetch endpoint
        """
        db = db.lower()
        xml = self._qs.efetch({"db": db, "id": str(id)})
        doc = le.XML(xml)
        if db in ["gene"]:
            return EntrezgeneSet(doc)
        if db in ["nuccore", "nucest", "protein"]:
            # TODO: GBSet is misnamed; it should be GBSeq and get the GBSeq XML node as root (see gbset.py)
            return GBSet(doc)
        if db in ["pubmed"]:
            return PubmedArticleSet(doc)
        if db in ["snp"]:
            return ExchangeSet(xml)
        if db in ["pmc"]:
            return PubmedCentralArticleSet(doc)
        raise EutilsError("database {db} is not currently supported by eutils".format(db=db))


# <LICENSE>
# Copyright 2015 eutils Committers
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
# http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing
# permissions and limitations under the License.
# </LICENSE>