List of authors from DOI

List of authors from DOI#

We can get a list of authors and their affiliations for any paper that is published in an AGU journal using the following code snippet. First, you need to install lxml, pandas, and async_retriever libraries.

from lxml import html
import pandas as pd
import async_retriever as ar

dois = [
    "10.1029/2018MS001603",
    "10.1029/2019MS001766",
    "10.1029/2018MS001583",
    "10.1029/2019MS001870",
]

base_url = "https://agupubs.onlinelibrary.wiley.com/doi/full"
urls = [f"{base_url}/{d}" for d in dois]
r = ar.retrieve(urls, "text")


def auth_xpath(n):
    return f'//*[@id="a{n}_Ctrl"]/span/text()'


def affi_xpath(n):
    return f'//*[@id="a{n}"]/p/text()'


for d, t in zip(dois, r):
    tree = html.fromstring(t)
    auth = [[tree.xpath(auth_xpath(n)), tree.xpath(affi_xpath(n))] for n in range(300)]
    auth = [(a[0], b[0] if len(b) == 1 else b[1]) for a, b in auth if len(a) == 1]
    auth_aff = pd.DataFrame(auth)
    auth_aff.to_csv(f"authors_{d.replace('/', '_')}.csv", index=False, header=False)

This snippet saves a list of authors and their affiliations to a csv file.