2018年5月21日月曜日

開発環境

Pythonからはじめる数学入門 (Amit Saha (著)、黒川 利明 (翻訳)、オライリージャパン)の2章(データを統計量で記述する)、3.9(プログラミングチャレンジ)、問題3-3(他のCSVデータでの実験)を取り組んでみる。

コード(Emacs)

Python 3

#!/usr/bin/env python3

from collections import Counter
import csv
import matplotlib.pyplot as plt


def read_csv(filename):
    with open(filename) as f:
        reader = csv.reader(f)
        next(reader)
        years = []
        pops = []
        for row in reader:
            years.append(row[0].split('-')[0])
            pops.append(float(row[1]))
    years.reverse()
    pops.reverse()

    return years, pops


def read_nums(filename):
    with open(filename) as f:
        nums = [float(line) for line in f]

    return nums


def mean(nums):
    s = sum(nums)
    n = len(nums)
    return s / n


def median(nums):
    n = len(nums)
    nums.sort()

    if n % 2 == 0:
        m1 = n / 2
        m2 = n / 2 + 1
        m1 = int(m1) - 1
        m2 = int(m2) - 1
        return (nums[m1] + nums[m2]) / 2
    m = (n + 1) / 2
    m = int(m) - 1
    return nums[m]


def mode(nums):
    c = Counter(nums)
    nums_freq = c.most_common()
    max_count = nums_freq[0][1]
    modes = [num[0] for num in nums_freq if num[1] == max_count]
    return modes


def differences(nums):
    m = mean(nums)
    diff = [n - m for n in nums]
    return diff


def variance(nums):
    diff = differences(nums)
    squared_diff = [d ** 2 for d in diff]
    sum_sqaured_diff = sum(squared_diff)
    return sum_sqaured_diff / len(nums)


def standard_deviation(nums):
    return variance(nums) ** (1 / 2)


if __name__ == '__main__':
    filename = 'USA_SP_POP_TOTL.csv'
    years, pops = read_csv(filename)
    xlabels = [f'{years[i]}-{years[i+1]}' for i in range(len(years) - 1)]
    diff = [pops[i + 1] - pops[i] for i in range(len(pops) - 1)]
    plt.plot(xlabels, diff)
    plt.xlabel('Year')
    plt.ylabel('Population')
    plt.xticks(range(0, len(years) - 1), rotation=20)
    plt.savefig('sample3.svg')
    funcs = [mean, median, mode, variance, standard_deviation]
    for func in funcs:
        print(f'{func.__name__.replace("_", " ")}: {func(diff)}')

入出力結果(Terminal, Jupyter(IPython))

$ ./sample3.py
mean: 2562366.153846154
median: 2476370.0
mode: [1945000.0, 1971000.0, 1994000.0, 2013000.0, 2033000.0, 2062000.0, 2099000.0, 2119000.0, 2128000.0, 2152000.0, 2156000.0, 2170000.0, 2198000.0, 2204000.0, 2209000.0, 2210000.0, 2235000.0, 2241000.0, 2257000.0, 2261591.0, 2320000.0, 2326224.0, 2346000.0, 2375000.0, 2414000.0, 2470000.0, 2482740.0, 2554696.0, 2609000.0, 2647000.0, 2656238.0, 2677563.0, 2697365.0, 2704000.0, 2711301.0, 2804000.0, 2806544.0, 2847000.0, 2851295.0, 2862759.0, 2863313.0, 3020000.0, 3116000.0, 3122411.0, 3152000.0, 3186000.0, 3197000.0, 3207000.0, 3263000.0, 3358000.0, 3405000.0, 3533000.0]
variance: 188985554755.28406
standard deviation: 434724.6884584358
$

0 コメント:

コメントを投稿