%%capture
# Install required Python packages
!pip install gql requests pyvis jsonpickle


# Prepare the GraphQL client
import requests
from IPython.display import display, Markdown
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport

_transport = RequestsHTTPTransport(
    url='https://api.datacite.org/graphql',
    use_json=True,
)

client = Client(
    transport=_transport,
    fetch_schema_from_transport=True,
)


# Generate the GraphQL query to retrieve up to 100 researchers matching query "John and Smith"
query_params = {
    "ids" : ["10.5061/dryad.234","10.15468/n6ftyd","10.1594/pangaea.314690"]
}

query = gql("""query getDatasetCitations($ids: [String!]) {
  datasets(ids: $ids) {
    nodes {
      id
      titles {
        title
      }
      citationCount
      citations {
        nodes {
          id
          publisher
          titles {
            title
          }
          citationCount
        }
      }
    }
  }
}


""")


import json
data = client.execute(query, variable_values=json.dumps(query_params))


# Get total citation counts for each dataset in the query
datasets = data['datasets']
tableBody=""
for dataset in datasets['nodes']:
    id = dataset['id']
    doi = "/".join(id.split("/")[3:])
    titles = []
    for title in dataset['titles']:
        titles.append(title['title'])
    citationCount = dataset['citationCount']
    tableBody += "[%s](%s) | [**%s**](%s/%s)\n" % (', '.join(titles), id, citationCount, "https://search.datacite.org/works",doi)
if tableBody:
    display(Markdown("| Dataset | Citation Count|\n|---|---|\n%s" % tableBody))


from pyvis.network import Network
import pandas as pd
from IPython.display import IFrame
import math

# Colour swatch for the network nodes
dataset_node_colour = "#FB8072"
citation_node_colour = "#80B1D3"

got_net = Network(height="750px", width="100%", bgcolor="#ffffff", font_color="black", notebook=True)
got_net.options.edges.inherit_colors(False)

# set the physics layout of the network
got_net.barnes_hut()

# ------------------------------
# Initialise intermediate data structure to store: (src, trg) -> citation count of the target, where:
# src - dataset or citation; trg - citation
srcTrg2Count = {}
# Initialise intermediate data structure to store: src --> Set of connected trg's
# Note that the number of connected trgs will determine the colour of each src
src2OtherTrgs = {}

datasets = data['datasets']

# Populate srcTrg2Count
allNodes = set()
for node in datasets['nodes']:
    nodeSet = set()
    datasetDOI = "/".join(node['id'].split("/")[3:])
    nodeSet.add(datasetDOI)
    for citation in node['citations']['nodes']:
        citationDOI = "/".join(citation['id'].split("/")[3:])
        citationCount = citation['citationCount']
        nodeSet.add(citationDOI)
        if datasetDOI not in src2OtherTrgs:
            src2OtherTrgs[datasetDOI] = set()
        src2OtherTrgs[datasetDOI].add(citationDOI)
        if citationDOI not in src2OtherTrgs:
            src2OtherTrgs[citationDOI] = set()   
        src2OtherTrgs[citationDOI].add(datasetDOI)        
        srcTrg2Count[(datasetDOI, citationDOI)] = citationCount     
    nodes = sorted(list(nodeSet))
    allNodes.update(nodes)

# Populate data structures needed for the graph
sources, targets, weights = [], [], []
for tuple in srcTrg2Count:
    if srcTrg2Count[tuple] >= 0:
        sources.append(tuple[0])
        targets.append(tuple[1])
        weights.append(srcTrg2Count[tuple])

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]
    src_node_size = 5 * math.log2(len(src2OtherTrgs[src]) * 5000)
    got_net.add_node(src, src, title="Dataset: %s;" % src, color=dataset_node_colour, size=src_node_size)   
    # We're adding 1 below to make edges representing 0 citations of the target appear in the force-directed graph   
    dst_node_size = 10 * math.log2((w+1) * 10)
    got_net.add_node(dst, dst, title="Citation: %s; Number of citations: %d;" % (dst, w), color=citation_node_colour, size=dst_node_size)
    got_net.add_edge(src, dst, value=1)
    
neighbor_map = got_net.get_adj_list()
# add neighbor data to node hover data
for node in got_net.nodes:
    node["title"] += " Neighbours:<br>" + "<br>".join(neighbor_map[node["id"]])

got_net.show("out.html")
display(Markdown("N.B. Click on the plot, then use down/up mouse scroll to zoom in/out respectively.<br>When zoomed in, you will notice the DOI label against each node.<br>Click on any node to see the list of 'neighbour' citations, and on the citation node to also see the number of its citations."))
IFrame(src="./out.html", width=1000, height=800)

Install libraries and prepare GraphQL client¶

Define and run GraphQL query¶

Display total number of citations per dataset¶

Plot a force-directed graph connecting datasets to their publications and citations of those publications¶