FREYA WP2 User Story 4 | As a funder I want to see how many of the research outputs funded by me have an open license enabling reuse, so that I am sure I properly support Open Science. | |
---|---|---|
Funders that support open research are interested in monitoring the extent of open access given to the outputs of grants they award - while the grant is active as well as retrospectively.
This notebook uses the DataCite GraphQL API to retrieve and report license types of outputs of the following funders to date:Goal: By the end of this notebook you should be able to:
%%capture
# Install required Python packages
!pip install gql requests numpy plotnine
# Prepare the GraphQL client
import requests
from IPython.display import display, Markdown
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport
_transport = RequestsHTTPTransport(
url='https://api.datacite.org/graphql',
use_json=True,
)
client = Client(
transport=_transport,
fetch_schema_from_transport=True,
)
Define the GraphQL query to find all outputs and associated licenses for three different funders: DFG (Deutsche Forschungsgemeinschaft, Germany), ANR (Agence Nationale de la Recherche, France) and SNF (Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung, Switzerland)).
# Generate the GraphQL query: find all outputs and their associated licenses (where available)
# for three different funders, identified by funder1, funder2 and funder3.
query_params = {
"funder1" : "https://doi.org/10.13039/501100001659",
"funder2" : "https://doi.org/10.13039/501100001665",
"funder3" : "https://doi.org/10.13039/501100001711"
}
funderId2Acronym = {
"https://doi.org/10.13039/501100001659" : "DFG",
"https://doi.org/10.13039/501100001665" : "ANR",
"https://doi.org/10.13039/501100001711" : "SNF"
}
query = gql("""query getGrantOutputsForFundersById(
$funder1: ID!,
$funder2: ID!,
$funder3: ID!
)
{
funder1: funder(id: $funder1) {
name
id
works {
totalCount
licenses {
id
title
count
}
}
},
funder2: funder(id: $funder2) {
name
id
works {
totalCount
licenses {
id
title
count
}
}
},
funder3: funder(id: $funder3) {
name
id
works {
totalCount
licenses {
id
title
count
}
}
},
funder1Dataset: funder(id: $funder1) {
name
id
works(resourceTypeId: "Dataset") {
totalCount
licenses {
id
title
count
}
}
},
funder1Text: funder(id: $funder1) {
name
id
works(resourceTypeId: "Text") {
totalCount
licenses {
id
title
count
}
}
},
funder2Dataset: funder(id: $funder2) {
name
id
works(resourceTypeId: "Dataset") {
totalCount
licenses {
id
title
count
}
}
},
funder2Text: funder(id: $funder2) {
name
id
works(resourceTypeId: "Text") {
totalCount
licenses {
id
title
count
}
}
},
funder3Dataset: funder(id: $funder3) {
name
id
works(resourceTypeId: "Dataset") {
totalCount
licenses {
id
title
count
}
}
},
funder3Text: funder(id: $funder3) {
name
id
works(resourceTypeId: "Text") {
totalCount
licenses {
id
title
count
}
}
}
}
""")
Run the above query via the GraphQL client
import json
data = client.execute(query, variable_values=json.dumps(query_params))
Plot an interactive bar plot showing the proportion of outputs issued under a given license type, for each funder.
import plotly.io as pio
import plotly.express as px
from IPython.display import IFrame
import pandas as pd
from operator import itemgetter
import re
# Adapted from: https://stackoverflow.com/questions/58766305/is-there-any-way-to-implement-stacked-or-grouped-bar-charts-in-plotly-express
def px_stacked_bar(df, color_name='License Type', y_name='Metrics', **pxargs):
idx_col = df.index.name
m = pd.melt(df.reset_index(), id_vars=idx_col, var_name=color_name, value_name=y_name)
# For Plotly colour sequences see: https://plotly.com/python/discrete-color/
return px.bar(m, x=idx_col, y=y_name, color=color_name, **pxargs,
color_discrete_sequence=px.colors.qualitative.Pastel1)
def get_grouped_license_type(licenseId):
ret = None
if re.search('cc-by-', licenseId) is not None:
ret = "cc-by"
elif re.search('cc0-', licenseId) is not None:
ret = "cc0"
elif licenseId is not None:
ret = "other"
return ret
queries = ['funder1', 'funder2', 'funder3']
# Map each license type to a dict that in turn maps the position of the output's bar in plot
# to the count of outputs corresponding to that license type.
licenseType2Pos2Count = {}
# Under the assumption of one license per work, for each funder licenseType2Pos2Count["No license"] is instantiated
# with the totalCount of works for that funder. Any work counts for a license found in funder['works']['licenses']
# will be subtracted from licenseType2Pos2Count["No license"] for that funder, in the end leaving the number of
# works with no license.
licenseType2Pos2Count["No license"] = {}
for pos1 in range(0, len(queries)):
# Initialise (no) license's counts for each funder
query = queries[pos1]
if query in data:
licenseType2Pos2Count["No license"][pos1] = data[query]['works']['totalCount']
# Populate license type counts per funder
# labels contains funder labels in bar plot - each bar corresponds to a single funder
labels = {}
pos = 0
for query in queries:
if query in data:
funder = data[query]
labels[pos] = funderId2Acronym[funder['id']]
for license in funder['works']['licenses']:
outputCount = license['count']
licenseId = get_grouped_license_type(license['id'])
if licenseId not in licenseType2Pos2Count:
licenseType2Pos2Count[licenseId] = {}
for pos1 in range(0, len(queries)):
# Initialise license's counts for each funder
licenseType2Pos2Count[licenseId][pos1] = 0
licenseType2Pos2Count[licenseId][pos] += outputCount
licenseType2Pos2Count["No license"][pos] -= outputCount
pos += 1
# Create stacked bar plot
x_name = "Funders"
dfDict = {x_name: labels}
for license in licenseType2Pos2Count:
dfDict[license] = licenseType2Pos2Count[license]
df = pd.DataFrame(dfDict)
fig = px_stacked_bar(df.set_index(x_name), y_name = "Output Counts")
# Set plot background to transparent
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)'
})
# Write interactive plot out to html file
# pio.write_html(fig, file='out.html')
# Display plot from the saved html file
display(Markdown("<br />License types of all funder's outputs to date, shown as a stacked bar plot - one bar per funder:"))
# IFrame(src="./out.html", width=500, height=500)
fig.show()
License types of all funder's outputs to date, shown as a stacked bar plot - one bar per funder: