This notebook compares the results and discrepancies between some information found in in the API and the dev.to website.
import json
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = 'plotly_white'
pio.renderers.default = 'notebook'
The top_articles_by_tag.json
file has the 100 tags shown in page https://dev.to/tags, and the two "Number of posts published": the one from the tags page (tag.num_articles
), and the one from the individual tag page https://dev.to/tags/total
).
The top_articles_by_tag_api.json
file has the 100 top tags returned by the API, and only the number of posts from the individual tag page https://dev.to/tags/total
).
Both have the top 100 articles for each tag.
with open('../top_articles_by_tag.json') as f:
data = json.load(f)
with open('../top_articles_by_tag_api.json') as f:
data_api = json.load(f)
Let's see the difference in "Number of posts published" shown in the tags page vs the individual tag page:
count_diff = tags = pd.DataFrame([
[
entry['tag']['name'],
entry['total'],
entry['tag']['num_articles'],
] for entry in data],
columns=['tag', 'tags_page', 'tag_page']).sort_values('tags_page', ascending=False)
df = count_diff
x = df['tag']
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=df['tags_page'], mode='lines+markers', name='On tags page'))
fig.add_trace(go.Scatter(x=x, y=df['tag_page'], mode='lines+markers', name='On individual tag page'))
fig.update_layout(
xaxis=dict(title='tag', tickmode='linear'),
legend=dict(orientation='h', yanchor='auto', y=1.0, xanchor='auto', x=.5)
)
fig.show()
df = count_diff.copy()
df['diff'] = df.tag_page - df.tags_page
df.style\
.hide(axis='index')\
.background_gradient(subset=['diff'])
tag | tags_page | tag_page | diff |
---|---|---|---|
javascript | 66866 | 109798 | 42932 |
webdev | 55014 | 93972 | 38958 |
beginners | 43082 | 68086 | 25004 |
tutorial | 28891 | 44794 | 15903 |
react | 25741 | 42109 | 16368 |
programming | 24030 | 50604 | 26574 |
python | 17440 | 30755 | 13315 |
discuss | 14723 | 21063 | 6340 |
productivity | 14487 | 23503 | 9016 |
css | 13578 | 22926 | 9348 |
career | 12457 | 19626 | 7169 |
node | 11865 | 18932 | 7067 |
devops | 11730 | 20101 | 8371 |
codenewbie | 10719 | 16574 | 5855 |
html | 9642 | 17422 | 7780 |
opensource | 9637 | 16559 | 6922 |
typescript | 9595 | 14785 | 5190 |
aws | 8963 | 14879 | 5916 |
showdev | 7714 | 10923 | 3209 |
github | 7503 | 12064 | 4561 |
java | 6911 | 12554 | 5643 |
testing | 6361 | 10531 | 4170 |
docker | 5843 | 8968 | 3125 |
php | 5568 | 10138 | 4570 |
security | 5414 | 9595 | 4181 |
linux | 5270 | 9108 | 3838 |
vue | 5204 | 9027 | 3823 |
ruby | 5065 | 7495 | 2430 |
git | 5002 | 7749 | 2747 |
angular | 4997 | 8889 | 3892 |
go | 4853 | 7719 | 2866 |
database | 4566 | 8044 | 3478 |
dotnet | 4406 | 6763 | 2357 |
csharp | 4248 | 6367 | 2119 |
serverless | 4167 | 6189 | 2022 |
machinelearning | 4005 | 7883 | 3878 |
kubernetes | 3938 | 6477 | 2539 |
rails | 3922 | 5897 | 1975 |
computerscience | 3907 | 6489 | 2582 |
cloud | 3777 | 7353 | 3576 |
android | 3687 | 23905 | 20218 |
design | 3645 | 6987 | 3342 |
laravel | 3622 | 7252 | 3630 |
azure | 3578 | 6102 | 2524 |
api | 3517 | 6255 | 2738 |
algorithms | 3455 | 5476 | 2021 |
architecture | 3209 | 5361 | 2152 |
help | 3197 | 5343 | 2146 |
learning | 3006 | 4588 | 1582 |
datascience | 2969 | 6518 | 3549 |
vscode | 2809 | 4321 | 1512 |
reactnative | 2800 | 5344 | 2544 |
graphql | 2744 | 4035 | 1291 |
frontend | 2647 | 4204 | 1557 |
nextjs | 2640 | 4392 | 1752 |
flutter | 2637 | 5203 | 2566 |
watercooler | 2546 | 4231 | 1685 |
django | 2505 | 4129 | 1624 |
ios | 2325 | 4869 | 2544 |
codepen | 2294 | 5408 | 3114 |
sql | 2270 | 3901 | 1631 |
rust | 2249 | 3415 | 1166 |
todayilearned | 2220 | 3117 | 897 |
blockchain | 2083 | 8072 | 5989 |
performance | 2011 | 3268 | 1257 |
hacktoberfest | 1982 | 3478 | 1496 |
startup | 1980 | 5146 | 3166 |
kotlin | 1974 | 3328 | 1354 |
motivation | 1972 | 3083 | 1111 |
news | 1916 | 27667 | 25751 |
coding | 1913 | 4638 | 2725 |
challenge | 1906 | 2934 | 1028 |
mongodb | 1834 | 2997 | 1163 |
development | 1778 | 4837 | 3059 |
microservices | 1773 | 2905 | 1132 |
tailwindcss | 1732 | 2611 | 879 |
postgres | 1730 | 2661 | 931 |
cpp | 1710 | 3706 | 1996 |
npm | 1637 | 2521 | 884 |
ux | 1624 | 3044 | 1420 |
gamedev | 1620 | 3380 | 1760 |
wordpress | 1564 | 4146 | 2582 |
writing | 1530 | 3014 | 1484 |
devjournal | 1422 | 2825 | 1403 |
mobile | 1415 | 7548 | 6133 |
dart | 1406 | 2229 | 823 |
leetcode | 1361 | 1861 | 500 |
ai | 1327 | 3250 | 1923 |
agile | 1225 | 2449 | 1224 |
firebase | 1201 | 2120 | 919 |
management | 1147 | 2499 | 1352 |
tooling | 1017 | 1802 | 785 |
meta | 1011 | 1592 | 581 |
braziliandevs | 1009 | 1543 | 534 |
mysql | 1002 | 2020 | 1018 |
web3 | 999 | 2708 | 1709 |
community | 795 | 2367 | 1572 |
cybersecurity | 672 | 1874 | 1202 |
actionshackathon21 | 244 | 311 | 67 |
archlinux | 151 | 34632 | 34481 |
tags_devto = [entry['tag']['name'] for entry in data]
tags_api = [entry['tag']['name'] for entry in data_api]
len(tags_devto), len(tags_api)
(100, 96)
tags_devto_set = set(tags_devto)
tags_api_set = set(tags_api)
def pp(s):
return f'({len(s)}):\n ' + ', '.join(s)
print('In both', pp(tags_devto_set & tags_api_set))
print()
print('Not in tags API', pp(tags_devto_set - tags_api_set))
print()
print('Not in tags dev.to', pp(tags_api_set - tags_devto_set))
In both (81): kubernetes, motivation, typescript, datascience, frontend, laravel, mobile, algorithms, docker, javascript, aws, node, vue, ux, productivity, machinelearning, github, programming, css, azure, computerscience, graphql, sql, reactnative, wordpress, php, nextjs, git, ruby, serverless, html, rust, performance, cpp, help, gamedev, mongodb, devops, python, opensource, go, vscode, ios, news, rails, archlinux, csharp, career, beginners, cloud, showdev, ai, hacktoberfest, api, dotnet, discuss, blockchain, writing, learning, java, design, startup, tutorial, testing, codepen, coding, kotlin, todayilearned, architecture, django, database, webdev, react, development, angular, watercooler, android, security, flutter, linux, codenewbie Not in tags API (19): postgres, leetcode, mysql, tooling, firebase, web3, braziliandevs, challenge, actionshackathon21, devjournal, tailwindcss, microservices, meta, npm, community, management, dart, agile, cybersecurity Not in tags dev.to (15): softwaredevelopment, uncategorized, functional, blog, software, web, interview, business, google, newsbrief, 100daysofcode, cryptocurrency, bitcoin, ubuntu, technology