/* Threads/posts per domain/subdomain */ SELECT substring_index(substring_index(threads.threadurl,'/',3),'/',-1) AS domain, count(DISTINCT posts.threadid) AS thread_count, count(1) AS post_count FROM posts INNER JOIN threads ON posts.threadid=threads.threadid GROUP BY substring_index(substring_index(threads.threadurl,'/',3),'/',-1) ORDER BY count(1) DESC; /* Output: +------------------+--------------+------------+ | domain | thread_count | post_count | +------------------+--------------+------------+ | img.4chan.org | 6098010 | 86055158 | | zip.4chan.org | 3174527 | 60523685 | | cgi.4chan.org | 1068770 | 8555311 | | orz.4chan.org | 338996 | 5428969 | | bin.4chan.org | 89335 | 1619595 | | dat.4chan.org | 24520 | 277371 | | yum.bbwchan.org | 4896 | 71436 | | foone.org | 4681 | 56608 | | may.not4chan.org | 4210 | 49937 | | bbwchan.org | 3752 | 44796 | | orly.yi.org | 1862 | 20883 | | foone.yi.org | 5 | 231 | +------------------+--------------+------------+ */ /* Threads/posts for 4Chan vs. non-4Chan */ SELECT (CASE WHEN domain LIKE '%\.4chan\.org' THEN 'yes' ELSE 'no' END) AS is_4chan, sum(thread_count) AS total_thread_count, sum(post_count) AS total_post_count FROM ( SELECT substring_index(substring_index(threads.threadurl,'/',3),'/',-1) AS domain, count(DISTINCT posts.threadid) AS thread_count, count(1) AS post_count FROM posts INNER JOIN threads ON posts.threadid=threads.threadid GROUP BY substring_index(substring_index(threads.threadurl,'/',3),'/',-1)) AS t GROUP BY (CASE WHEN domain LIKE '%\.4chan\.org' THEN 'yes' ELSE 'no' END) ORDER BY is_4chan DESC; /* Output: +----------+--------------------+------------------+ | is_4chan | total_thread_count | total_post_count | +----------+--------------------+------------------+ | yes | 10794158 | 162460089 | | no | 19406 | 243891 | +----------+--------------------+------------------+ */ /* Using results from previous */ SELECT count(*) AS all_posts, 243891 AS non_4chan_posts, CONCAT(243891/count(*)*100,'%') AS percent_non_4chan_posts FROM posts; /* Output: +-----------+-----------------+-------------------------+ | all_posts | non_4chan_posts | percent_non_4chan_posts | +-----------+-----------------+-------------------------+ | 162703985 | 243891 | 0.1499% | +-----------+-----------------+-------------------------+ */