This notebook compares the results and discrepancies between some information found in in the API and the dev.to website.
import json
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = 'plotly_white'
pio.renderers.default = 'notebook'
The top_articles_by_tag.json file has the 100 tags shown in page https://dev.to/tags, and the two "Number of posts published": the one from the tags page (tag.num_articles), and the one from the individual tag page https://dev.to/tags/total).
The top_articles_by_tag_api.json file has the 100 top tags returned by the API, and only the number of posts from the individual tag page https://dev.to/tags/total).
Both have the top 100 articles for each tag.
with open('../top_articles_by_tag.json') as f:
data = json.load(f)
with open('../top_articles_by_tag_api.json') as f:
data_api = json.load(f)
Let's see the difference in "Number of posts published" shown in the tags page vs the individual tag page:
count_diff = tags = pd.DataFrame([
[
entry['tag']['name'],
entry['total'],
entry['tag']['num_articles'],
] for entry in data],
columns=['tag', 'tags_page', 'tag_page']).sort_values('tags_page', ascending=False)
df = count_diff
x = df['tag']
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=df['tags_page'], mode='lines+markers', name='On tags page'))
fig.add_trace(go.Scatter(x=x, y=df['tag_page'], mode='lines+markers', name='On individual tag page'))
fig.update_layout(
xaxis=dict(title='tag', tickmode='linear'),
legend=dict(orientation='h', yanchor='auto', y=1.0, xanchor='auto', x=.5)
)
fig.show()
df = count_diff.copy()
df['diff'] = df.tag_page - df.tags_page
df.style\
.hide(axis='index')\
.background_gradient(subset=['diff'])
| tag | tags_page | tag_page | diff |
|---|---|---|---|
| javascript | 66866 | 109798 | 42932 |
| webdev | 55014 | 93972 | 38958 |
| beginners | 43082 | 68086 | 25004 |
| tutorial | 28891 | 44794 | 15903 |
| react | 25741 | 42109 | 16368 |
| programming | 24030 | 50604 | 26574 |
| python | 17440 | 30755 | 13315 |
| discuss | 14723 | 21063 | 6340 |
| productivity | 14487 | 23503 | 9016 |
| css | 13578 | 22926 | 9348 |
| career | 12457 | 19626 | 7169 |
| node | 11865 | 18932 | 7067 |
| devops | 11730 | 20101 | 8371 |
| codenewbie | 10719 | 16574 | 5855 |
| html | 9642 | 17422 | 7780 |
| opensource | 9637 | 16559 | 6922 |
| typescript | 9595 | 14785 | 5190 |
| aws | 8963 | 14879 | 5916 |
| showdev | 7714 | 10923 | 3209 |
| github | 7503 | 12064 | 4561 |
| java | 6911 | 12554 | 5643 |
| testing | 6361 | 10531 | 4170 |
| docker | 5843 | 8968 | 3125 |
| php | 5568 | 10138 | 4570 |
| security | 5414 | 9595 | 4181 |
| linux | 5270 | 9108 | 3838 |
| vue | 5204 | 9027 | 3823 |
| ruby | 5065 | 7495 | 2430 |
| git | 5002 | 7749 | 2747 |
| angular | 4997 | 8889 | 3892 |
| go | 4853 | 7719 | 2866 |
| database | 4566 | 8044 | 3478 |
| dotnet | 4406 | 6763 | 2357 |
| csharp | 4248 | 6367 | 2119 |
| serverless | 4167 | 6189 | 2022 |
| machinelearning | 4005 | 7883 | 3878 |
| kubernetes | 3938 | 6477 | 2539 |
| rails | 3922 | 5897 | 1975 |
| computerscience | 3907 | 6489 | 2582 |
| cloud | 3777 | 7353 | 3576 |
| android | 3687 | 23905 | 20218 |
| design | 3645 | 6987 | 3342 |
| laravel | 3622 | 7252 | 3630 |
| azure | 3578 | 6102 | 2524 |
| api | 3517 | 6255 | 2738 |
| algorithms | 3455 | 5476 | 2021 |
| architecture | 3209 | 5361 | 2152 |
| help | 3197 | 5343 | 2146 |
| learning | 3006 | 4588 | 1582 |
| datascience | 2969 | 6518 | 3549 |
| vscode | 2809 | 4321 | 1512 |
| reactnative | 2800 | 5344 | 2544 |
| graphql | 2744 | 4035 | 1291 |
| frontend | 2647 | 4204 | 1557 |
| nextjs | 2640 | 4392 | 1752 |
| flutter | 2637 | 5203 | 2566 |
| watercooler | 2546 | 4231 | 1685 |
| django | 2505 | 4129 | 1624 |
| ios | 2325 | 4869 | 2544 |
| codepen | 2294 | 5408 | 3114 |
| sql | 2270 | 3901 | 1631 |
| rust | 2249 | 3415 | 1166 |
| todayilearned | 2220 | 3117 | 897 |
| blockchain | 2083 | 8072 | 5989 |
| performance | 2011 | 3268 | 1257 |
| hacktoberfest | 1982 | 3478 | 1496 |
| startup | 1980 | 5146 | 3166 |
| kotlin | 1974 | 3328 | 1354 |
| motivation | 1972 | 3083 | 1111 |
| news | 1916 | 27667 | 25751 |
| coding | 1913 | 4638 | 2725 |
| challenge | 1906 | 2934 | 1028 |
| mongodb | 1834 | 2997 | 1163 |
| development | 1778 | 4837 | 3059 |
| microservices | 1773 | 2905 | 1132 |
| tailwindcss | 1732 | 2611 | 879 |
| postgres | 1730 | 2661 | 931 |
| cpp | 1710 | 3706 | 1996 |
| npm | 1637 | 2521 | 884 |
| ux | 1624 | 3044 | 1420 |
| gamedev | 1620 | 3380 | 1760 |
| wordpress | 1564 | 4146 | 2582 |
| writing | 1530 | 3014 | 1484 |
| devjournal | 1422 | 2825 | 1403 |
| mobile | 1415 | 7548 | 6133 |
| dart | 1406 | 2229 | 823 |
| leetcode | 1361 | 1861 | 500 |
| ai | 1327 | 3250 | 1923 |
| agile | 1225 | 2449 | 1224 |
| firebase | 1201 | 2120 | 919 |
| management | 1147 | 2499 | 1352 |
| tooling | 1017 | 1802 | 785 |
| meta | 1011 | 1592 | 581 |
| braziliandevs | 1009 | 1543 | 534 |
| mysql | 1002 | 2020 | 1018 |
| web3 | 999 | 2708 | 1709 |
| community | 795 | 2367 | 1572 |
| cybersecurity | 672 | 1874 | 1202 |
| actionshackathon21 | 244 | 311 | 67 |
| archlinux | 151 | 34632 | 34481 |
tags_devto = [entry['tag']['name'] for entry in data]
tags_api = [entry['tag']['name'] for entry in data_api]
len(tags_devto), len(tags_api)
(100, 96)
tags_devto_set = set(tags_devto)
tags_api_set = set(tags_api)
def pp(s):
return f'({len(s)}):\n ' + ', '.join(s)
print('In both', pp(tags_devto_set & tags_api_set))
print()
print('Not in tags API', pp(tags_devto_set - tags_api_set))
print()
print('Not in tags dev.to', pp(tags_api_set - tags_devto_set))
In both (81): kubernetes, motivation, typescript, datascience, frontend, laravel, mobile, algorithms, docker, javascript, aws, node, vue, ux, productivity, machinelearning, github, programming, css, azure, computerscience, graphql, sql, reactnative, wordpress, php, nextjs, git, ruby, serverless, html, rust, performance, cpp, help, gamedev, mongodb, devops, python, opensource, go, vscode, ios, news, rails, archlinux, csharp, career, beginners, cloud, showdev, ai, hacktoberfest, api, dotnet, discuss, blockchain, writing, learning, java, design, startup, tutorial, testing, codepen, coding, kotlin, todayilearned, architecture, django, database, webdev, react, development, angular, watercooler, android, security, flutter, linux, codenewbie Not in tags API (19): postgres, leetcode, mysql, tooling, firebase, web3, braziliandevs, challenge, actionshackathon21, devjournal, tailwindcss, microservices, meta, npm, community, management, dart, agile, cybersecurity Not in tags dev.to (15): softwaredevelopment, uncategorized, functional, blog, software, web, interview, business, google, newsbrief, 100daysofcode, cryptocurrency, bitcoin, ubuntu, technology