%%capture
# Install required Python packages
!pip install gql requests numpy plotnine


# Prepare the GraphQL client
import requests
from IPython.display import display, Markdown
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport

_transport = RequestsHTTPTransport(
    url='https://api.datacite.org/graphql',
    use_json=True,
)

client = Client(
    transport=_transport,
    fetch_schema_from_transport=True,
)


# Generate the GraphQL query: find all outputs and their associated licenses (where available) 
# for three different funders, identified by funder1, funder2 and funder3.
query_params = {
    "funder1" : "https://doi.org/10.13039/501100001659",
    "funder2" : "https://doi.org/10.13039/501100001665",
    "funder3" : "https://doi.org/10.13039/501100001711"
}

funderId2Acronym = {
    "https://doi.org/10.13039/501100001659" : "DFG",
    "https://doi.org/10.13039/501100001665" : "ANR",
    "https://doi.org/10.13039/501100001711" : "SNF"
}

query = gql("""query getGrantOutputsForFundersById(
    $funder1: ID!,
    $funder2: ID!,
    $funder3: ID!
    )
{
funder1: funder(id: $funder1) {
  name
  id
  works {
      totalCount
      licenses {
        id
        title
        count
      }        
    }
  },
funder2: funder(id: $funder2) {
  name
  id
  works {
      totalCount
      licenses {
        id
        title
        count
      }        
    }
  },
funder3: funder(id: $funder3) {
  name
  id
  works {
      totalCount
      licenses {
        id
        title
        count
      }        
    }
  },
funder1Dataset: funder(id: $funder1) {
  name
  id
  works(resourceTypeId: "Dataset") {
      totalCount
      licenses {
        id
        title
        count
      }        
    }
  },
funder1Text: funder(id: $funder1) {
  name
  id
  works(resourceTypeId: "Text") {
      totalCount
      licenses {
        id
        title
        count
      }        
    }
  },
funder2Dataset: funder(id: $funder2) {
  name
  id
  works(resourceTypeId: "Dataset") {
      totalCount
      licenses {
        id
        title
        count
      }       
    }
  },
funder2Text: funder(id: $funder2) {
  name
  id
  works(resourceTypeId: "Text") {
      totalCount
      licenses {
        id
        title
        count
      }        
    }
  },
funder3Dataset: funder(id: $funder3) {
  name
  id
  works(resourceTypeId: "Dataset") {
      totalCount
      licenses {
        id
        title
        count
      }       
    }
  },
funder3Text: funder(id: $funder3) {
  name
  id
  works(resourceTypeId: "Text") {
      totalCount
      licenses {
        id
        title
        count
      }        
    }
  } 
}
""")


import json
data = client.execute(query, variable_values=json.dumps(query_params))


import plotly.io as pio
import plotly.express as px
from IPython.display import IFrame
import pandas as pd
from operator import itemgetter
import re

# Adapted from: https://stackoverflow.com/questions/58766305/is-there-any-way-to-implement-stacked-or-grouped-bar-charts-in-plotly-express
def px_stacked_bar(df, color_name='License Type', y_name='Metrics', **pxargs):
    idx_col = df.index.name
    m = pd.melt(df.reset_index(), id_vars=idx_col, var_name=color_name, value_name=y_name)
    # For Plotly colour sequences see: https://plotly.com/python/discrete-color/     
    return px.bar(m, x=idx_col, y=y_name, color=color_name, **pxargs, 
                  color_discrete_sequence=px.colors.qualitative.Pastel1)
 
def get_grouped_license_type(licenseId):
    ret = None
    if re.search('cc-by-', licenseId) is not None:
        ret = "cc-by"
    elif re.search('cc0-', licenseId) is not None:
        ret = "cc0"
    elif licenseId is not None:
        ret = "other"
    return ret 
            
queries = ['funder1', 'funder2', 'funder3']
# Map each license type to a dict that in turn maps the position of the output's bar in plot 
# to the count of outputs corresponding to that license type.
licenseType2Pos2Count = {}

# Under the assumption of one license per work, for each funder licenseType2Pos2Count["No license"] is instantiated
# with the totalCount of works for that funder. Any work counts for a license found in funder['works']['licenses']
# will be subtracted from licenseType2Pos2Count["No license"] for that funder, in the end leaving the number of 
# works with no license.
licenseType2Pos2Count["No license"] = {}
for pos1 in range(0, len(queries)):
    # Initialise (no) license's counts for each funder  
    query = queries[pos1]
    if query in data:
        licenseType2Pos2Count["No license"][pos1] = data[query]['works']['totalCount']
    
# Populate license type counts per funder
# labels contains funder labels in bar plot - each bar corresponds to a single funder
labels = {}
pos = 0
for query in queries:
    if query in data:
        funder = data[query]
        labels[pos] = funderId2Acronym[funder['id']]
    
        for license in funder['works']['licenses']:
            outputCount = license['count']
            licenseId = get_grouped_license_type(license['id'])
            if licenseId not in licenseType2Pos2Count:
                licenseType2Pos2Count[licenseId] = {}
                for pos1 in range(0, len(queries)):
                    # Initialise license's counts for each funder
                    licenseType2Pos2Count[licenseId][pos1] = 0
                
            licenseType2Pos2Count[licenseId][pos] += outputCount
            licenseType2Pos2Count["No license"][pos] -= outputCount
    pos += 1
        
# Create stacked bar plot
x_name = "Funders"
dfDict = {x_name: labels}

for license in licenseType2Pos2Count:
    dfDict[license] = licenseType2Pos2Count[license]

df = pd.DataFrame(dfDict)
fig = px_stacked_bar(df.set_index(x_name), y_name = "Output Counts")

# Set plot background to transparent
fig.update_layout({
'plot_bgcolor': 'rgba(0, 0, 0, 0)',
'paper_bgcolor': 'rgba(0, 0, 0, 0)'
})

# Write interactive plot out to html file
# pio.write_html(fig, file='out.html')

# Display plot from the saved html file
display(Markdown("<br />License types of all funder's outputs to date, shown as a stacked bar plot - one bar per funder:"))
# IFrame(src="./out.html", width=500, height=500)
fig.show()


import plotly.express as px
import re

xstr = lambda s: 'General' if s is None else str(s)

# Populate license type counts per funder
funderQueryLabels = ['funder1', 'funder2', 'funder3']
outputTypeLabels = ["Dataset", "Text"]

funder2resType2licenceType2outputCount = {}
# funderAcronym2Name is needed for the plot legend - as funder names are too long to be shown in the plot itself
funderAcronym2Name = {}

# Collect license type counts data into funder2resType2licenceType2outputCount
for funderQueryLabel in funderQueryLabels:
    for outputType in outputTypeLabels:
        query = funderQueryLabel + outputType
        if query in data:
            funder = data[query]
            funderAcronym = funderId2Acronym[funder['id']]
            funderAcronym2Name[funderAcronym] = funder['name']
            if funderAcronym not in funder2resType2licenceType2outputCount:
                funder2resType2licenceType2outputCount[funderAcronym] = {}
            if outputType not in funder2resType2licenceType2outputCount[funderAcronym]:
                funder2resType2licenceType2outputCount[funderAcronym][outputType] = {}
            
            # Under the assumption of one license per work, for each funder
            # funder2resType2licenceType2outputCount[funderAcronym][outputType]["No license"] is instantiated
            # with the totalCount of works for that funder and outputType. Any work counts for a license found in funder['works']['licenses']
            # will be subtracted from funder2resType2licenceType2outputCount[funderAcronym][outputType]["No license"] for that funder, 
            # in the end leaving the number of works with no license.
            if "No license" not in funder2resType2licenceType2outputCount[funderAcronym][outputType]:
                funder2resType2licenceType2outputCount[funderAcronym][outputType]["No license"] = funder['works']['totalCount']
            
            for license in funder['works']['licenses']:
                outputCount = license['count']
                licenseId = get_grouped_license_type(license['id'])
                if licenseId not in funder2resType2licenceType2outputCount[funderAcronym][outputType]:
                    funder2resType2licenceType2outputCount[funderAcronym][outputType][licenseId] = 0
                funder2resType2licenceType2outputCount[funderAcronym][outputType][licenseId] += outputCount
                funder2resType2licenceType2outputCount[funderAcronym][outputType]["No license"] -= outputCount
            

# Populate data structures for faceted stacked bar plot
funders, outputTypes, licenseTypes, outputCounts  = ({}, {}, {}, {})
pos = 0
for funder in funder2resType2licenceType2outputCount:
    for outputType in funder2resType2licenceType2outputCount[funder]:          
        for licenseType in funder2resType2licenceType2outputCount[funder][outputType]:
            funders[pos] = funder
            outputTypes[pos] = outputType                   
            licenseTypes[pos] = licenseType           
            outputCounts[pos] = funder2resType2licenceType2outputCount[funder][outputType][licenseType]
            pos += 1
dfDict = {"Funder": funders, "Output Type": outputTypes, "License": licenseTypes, "Output Count": outputCounts}
df1 = pd.DataFrame(dfDict)

# Create funders legend
tableBody=""
for funderAcronym in funderAcronym2Name:
    tableBody += "%s | %s\n" % (funderAcronym, funderAcronym2Name[funderAcronym])

fig2 = px.bar(df1, x="Output Type", y="Output Count", color="License", barmode="stack",
             facet_row="Funder"
#            facet_col=""
            )
# fig2.update_traces(texttemplate='%{text:}', textposition='inside')
fig2.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')

# Write interactive plot out to html file
pio.write_html(fig2, file='out2.html')

# Display plot from the saved html file
markDownContent="<br />Fo each funder, the plot below shows counts of all outputs to date of type %s, corresponding to a given license type." + \
"<br />Full information is shown when you mouse-over a bar." + \
"<br />"
display(Markdown(markDownContent % " or ".join(outputTypeLabels)))
display(Markdown("| Acronym | Funder Name|\n|---|---|\n%s" % tableBody))

# IFrame(src="./out2.html", width=500, height=700)
fig2.show()

Install libraries and prepare GraphQL client¶

Define and run GraphQL query¶

Display bar plot of number of outputs per license type and funder.¶

Plot output counts per license type, funder and year¶