{ "cells": [ { "cell_type": "code", "execution_count": 151, "id": "e89513d7-8a77-43d6-a085-8f2fd66f9ae7", "metadata": {}, "outputs": [], "source": [ "from pycoingecko import CoinGeckoAPI\n", "import pandas as pd\n", "from rdflib import Graph, Literal, RDF, URIRef, Namespace #basic RDF handling\n", "from rdflib.namespace import XSD #most common namespaces\n", "import pandas as pd\n", "#import rdflib\n", "#from rdflib.namespace import NamespaceManager" ] }, { "cell_type": "code", "execution_count": 152, "id": "94e2ad9a-0ec5-4763-afe0-c596bbde7f11", "metadata": {}, "outputs": [], "source": [ "cg = CoinGeckoAPI()" ] }, { "cell_type": "code", "execution_count": 153, "id": "cf370d3b-0232-4850-b43b-2d5af9df5c3a", "metadata": {}, "outputs": [], "source": [ "coins = cg.get_coins_list(include_platform=False)\n", "coin_list = [c['id'] for c in coins]" ] }, { "cell_type": "code", "execution_count": 154, "id": "10cc1b68-e466-4092-ba2b-334b833d0980", "metadata": {}, "outputs": [], "source": [ "whitepapers = ['aave','adex','aditus','bitcoin','ethereum','0x','aichain','ark','polkadot']" ] }, { "cell_type": "code", "execution_count": 155, "id": "22991b05-7e37-411f-a4ee-2956190ad4c4", "metadata": {}, "outputs": [], "source": [ "coin_list = [x for x in coin_list if x in whitepapers]" ] }, { "cell_type": "code", "execution_count": 156, "id": "820c21d9-76e8-4743-9baf-727e0278af29", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0x\n", "aave\n", "adex\n", "aditus\n", "aichain\n", "ark\n", "bitcoin\n", "ethereum\n", "polkadot\n" ] } ], "source": [ "coins = pd.DataFrame()\n", "for coin in coin_list:\n", " print(coin)\n", " coin_info = cg.get_coin_by_id(id=coin.lower(), localization=False, tickers=False, market_data=True,\n", " community_data=True, developer_data=True, sparkline=False)\n", " df = pd.DataFrame.from_dict(coin_info, orient='index')\n", " df = df.T\n", " coins = coins.append(df)" ] }, { "cell_type": "code", "execution_count": 157, "id": "784e0ebb-6540-416c-b865-221f77d7a1b7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":4: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", " coins[column] = coins[column].str.replace('\\\\', '')\n" ] } ], "source": [ "for column in coins.columns:\n", " try:\n", " coins[column] = coins[column].str.replace(' ', '')\n", " coins[column] = coins[column].str.replace('\\\\', '')\n", " coins[column] = coins[column].str.replace('-', '')\n", " coins[column] = coins[column].str.lower()\n", " except:\n", " pass" ] }, { "cell_type": "code", "execution_count": 158, "id": "918a369b-d2dc-4511-97ad-875b6bd2a4d8", "metadata": {}, "outputs": [], "source": [ "g = Graph()\n", "schema = Namespace('http://schema.org/')" ] }, { "cell_type": "code", "execution_count": 159, "id": "cedd34f1-ab0d-48e9-9c79-d480877afd92", "metadata": {}, "outputs": [], "source": [ "for index, row in coins.iterrows():\n", " #Make the symbol column a class\n", " g.add((URIRef(row['symbol']), RDF.type, URIRef(schema + 'symbol')))\n", " #Connect symbol to the id column and define id as a string\n", " g.add((URIRef(row['symbol']), URIRef(schema+'id'), Literal(row['id'], datatype=XSD.string) ))\n", " \n", " #For coins that have a value for 'asset_platform_id', define asset_platform_id as a class and connect to the symbol\n", " if row['asset_platform_id'] != None:\n", " g.add((URIRef(row['asset_platform_id']), RDF.type, URIRef(schema+'asset_platform_id')))\n", " g.add((URIRef(row['symbol']), URIRef(schema+'asset_platform_id'), Literal(row['asset_platform_id'], datatype=XSD.string) )) \n", " \n", " #For coins that have a value for 'coingecko_rank', connect this value to symbol as a literal\n", " if row['coingecko_rank'] != None:\n", " try:\n", " g.add((URIRef(row['symbol']), URIRef(schema+'coingecko_rank'), Literal(row['coingecko_rank'], datatype=XSD.integer) ))\n", " except:\n", " pass\n", " #For coins that have a value for 'liquidity_score', connect this value to symbol as a literal\n", " if row['liquidity_score'] != None:\n", " try:\n", " g.add((URIRef(row['symbol']), URIRef(schema+'liquidity_score'), Literal(row['liquidity_score'], datatype=XSD.float) ))\n", " except:\n", " pass " ] }, { "cell_type": "code", "execution_count": 160, "id": "e46c5cc3-b447-4673-b240-a26fa05c4484", "metadata": {}, "outputs": [], "source": [ "#print(g.serialize(format='turtle'))" ] }, { "cell_type": "code", "execution_count": 161, "id": "44461b32-aa85-43cf-9aae-3b9fab9a5a7b", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"allMentions.csv\")" ] }, { "cell_type": "code", "execution_count": 162, "id": "f620d2b9-e806-4606-be24-38edb08f5486", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0mentionstoken
00BitHopeadx
11CryptoCrowdadx
22RSKadx
33poadx
40httpswwwark
\n", "
" ], "text/plain": [ " Unnamed: 0 mentions token\n", "0 0 BitHope adx\n", "1 1 CryptoCrowd adx\n", "2 2 RSK adx\n", "3 3 po adx\n", "4 0 httpswww ark" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 163, "id": "d8796605-28d5-4bd8-a5d3-3cf6f461346e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ ":4: FutureWarning: The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.\n", " df[column] = df[column].str.replace('\\\\', '')\n" ] } ], "source": [ "for column in df.columns:\n", " try:\n", " df[column] = df[column].str.replace(' ', '')\n", " df[column] = df[column].str.replace('\\\\', '')\n", " df[column] = df[column].str.replace('-', '')\n", " df[column] = df[column].str.lower()\n", " except:\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "id": "b987710c-e1e5-4fc1-a81f-d82fbdf73bd7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 164, "id": "ee9efb0f-7d0b-446d-933c-2eb142826af3", "metadata": {}, "outputs": [], "source": [ "df['mentions'] = df['mentions'].str.replace('bitcoin', 'btc')\n", "df['token'] = df['token'].str.replace('bitcoin', 'btc')\n" ] }, { "cell_type": "code", "execution_count": 165, "id": "49fa032a-91d2-4329-9c3d-f94796b8a1af", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0mentionstoken
00bithopeadx
11cryptocrowdadx
22rskadx
33poadx
40httpswwwark
............
893217gamematrixait
894218governmentbusinessait
895219hainait
896220huoyanait
897221yearsait
\n", "

898 rows × 3 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 mentions token\n", "0 0 bithope adx\n", "1 1 cryptocrowd adx\n", "2 2 rsk adx\n", "3 3 po adx\n", "4 0 httpswww ark\n", ".. ... ... ...\n", "893 217 gamematrix ait\n", "894 218 governmentbusiness ait\n", "895 219 hain ait\n", "896 220 huoyan ait\n", "897 221 years ait\n", "\n", "[898 rows x 3 columns]" ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 166, "id": "58545efb-b27c-40b6-bbf5-c2e7e809d8bd", "metadata": {}, "outputs": [], "source": [ "for index, row in df.iterrows():\n", " \n", " #Make symbol a class - the same as symbol from the metadata so that they are connected in the RDF\n", " g.add((URIRef(row['token']), RDF.type, URIRef(schema + 'symbol')))\n", " #Add connection from token to mentions\n", " g.add((URIRef(row['token']), URIRef(schema+'mentions'), URIRef(schema + row['mentions']) ))\n", " #g.add((URIRef(row['token']), URIRef(schema+'mentions'), Literal(row['mentions'], datatype=XSD.string) ))\n" ] }, { "cell_type": "code", "execution_count": 167, "id": "4b805da3-af84-4bc8-8042-0de1dd41e682", "metadata": {}, "outputs": [ { "data": { "text/plain": [ ")>" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g.serialize('KG.ttl',format='turtle')" ] }, { "cell_type": "code", "execution_count": null, "id": "123d6548-de5b-43a4-8212-c9300dce1838", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }