* 参数的先验信念：p∼Uniform(0,1)
* 似然函数：data∼Bernoulli(p)

import pymc3 as pm import numpy.random as npr import numpy as np import
matplotlib.pyplot as plt import matplotlib as mpl from collections import
Counter import seaborn as sns sns.set_style('white') sns.set_context('poster')
InlineBackend.figure_format = 'retina' import warnings
warnings.filterwarnings('ignore') from random import shuffle total = 30 n_heads
= 11 n_tails = total - n_heads tosses = [1] * n_heads + [0] * n_tails
shuffle(tosses)

def plot_coins(): fig = plt.figure() ax = fig.add_subplot(1,1,1)
ax.bar(list(Counter(tosses).keys()), list(Counter(tosses).values()))
ax.set_xticks([0, 1]) ax.set_xticklabels(['tails', 'heads']) ax.set_ylim(0, 20)
ax.set_yticks(np.arange(0, 21, 5)) return fig fig = plot_coins() plt.show()

建立模型：
with pm.Model() as coin_model: # Specify prior using Uniform object. p_prior =
pm.Uniform('p', 0, 1) # Specify likelihood using Bernoulli object. like =
pm.Bernoulli('likelihood', p=p_prior, observed=tosses) # "observed=data" is key
for likelihood.
MCMC Inference Button
with coin_model: step = pm.Metropolis() # focus on this, the Inference Button:
coin_trace = pm.sample(2000, step=step)

pm.traceplot(coin_trace) plt.show()

pm.plot_posterior(coin_trace[100:], color='#87ceeb',rope=[0.48,0.52],
point_estimate='mean', ref_val=0.5) plt.show()

* 测试X的浓度范围，测量流感活动
* 根据实验结果计算 IC50：导致病毒复制率减半的X浓度。

import numpy as np import pandas as pd chem_data = [(0.00080, 99), (0.00800,
91), (0.08000, 89), (0.40000, 89), (0.80000, 79), (1.60000, 61), (4.00000, 39),
(8.00000, 25), (80.00000, 4)] chem_df = pd.DataFrame(chem_data) chem_df.columns
= ['concentration', 'activity'] chem_df['concentration_log'] =
chem_df['concentration'].apply(lambda x:np.log10(x))

def plot_chemical_data(log=True): fig = plt.figure(figsize=(10,6)) ax =
y=chem_df['activity']) ax.set_xlabel('log10(concentration (mM))', fontsize=20)
else: ax.scatter(x=chem_df['concentration'], y=chem_df['activity'])
ax.set_xlabel('concentration (mM)', fontsize=20) ax.set_xticklabels([int(i) for
i in ax.get_xticks()], fontsize=18) ax.set_yticklabels([int(i) for i in
ax.get_yticks()], fontsize=18) plt.hlines(y=50, xmin=min(ax.get_xlim()),
xmax=max(ax.get_xlim()), linestyles='--',) return fig fig =
plot_chemical_data(log=True) plt.show()

with pm.Model() as ic50_model: beta = pm.HalfNormal('beta', sd=100**2)
ic50_log10 = pm.Flat('IC50_log10') # Flat prior # MATH WITH DISTRIBUTION
OBJECTS! measurements = beta / (1 + np.exp(chem_df['concentration_log'].values
- ic50_log10)) y_like = pm.Normal('y_like', mu=measurements,
observed=chem_df['activity']) ic50 = pm.Deterministic('IC50', np.power(10,
ic50_log10)) # MCMC Inference Button with ic50_model: step = pm.Metropolis()
ic50_trace = pm.sample(10000, step=step) pm.traceplot(ic50_trace[2000:],
varnames=['IC50_log10', 'IC50']) # live: sample from step 2000 onwards.
plt.show()

pm.plot_posterior(ic50_trace[4000:], varnames=['IC50'], color='#87ceeb',
point_estimate='mean') plt.show()

该化学物质的IC50在约 [2mM，2.4mM]（95％HPD）

drug = [ 99., 110., 107., 104., 省略] placebo = [ 95., 105., 103., 99., 省略] def
ECDF(data): x = np.sort(data) y = np.cumsum(x) / np.sum(x) return x, ydef
plot_drug(): fig = plt.figure() ax = fig.add_subplot(1,1,1) x_drug, y_drug =
ECDF(drug) ax.plot(x_drug, y_drug, label='drug, n={0}'.format(len(drug)))
x_placebo, y_placebo = ECDF(placebo) ax.plot(x_placebo, y_placebo,
label='placebo, n={0}'.format(len(placebo))) ax.legend() ax.set_xlabel('IQ
Score') ax.set_ylabel('Cumulative Frequency') ax.hlines(0.5, ax.get_xlim()[0],
ax.get_xlim()[1], linestyle='--') return fig fig = plot_drug() plt.show()

from scipy.stats import ttest_ind ttest_ind(drug, placebo) #
Ttest_indResult(statistic=2.2806701634329549, pvalue=0.025011500508647616)

* 随机将参与者分配给两个实验组：
* +drug vs. -drug
* 测量每个参与者的 IQ Scores

y_vals = np.concatenate([drug, placebo]) labels = ['drug'] * len(drug) +
['placebo'] * len(placebo) data = pd.DataFrame([y_vals, labels]).T data.columns
= ['IQ', 'treatment'] with pm.Model() as kruschke_model: # Linking Distribution
Objects together is done by # passing objects into other objects' parameters.
mu_drug = pm.Normal('mu_drug', mu=0, sd=100**2) mu_placebo =
pm.Normal('mu_placebo', mu=0, sd=100**2) sigma_drug =
pm.HalfCauchy('sigma_drug', beta=100) sigma_placebo =
pm.HalfCauchy('sigma_placebo', beta=100) nu = pm.Exponential('nu', lam=1/29) +
1 drug_like = pm.StudentT('drug', nu=nu, mu=mu_drug, sd=sigma_drug,
observed=drug) placebo_like = pm.StudentT('placebo', nu=nu, mu=mu_placebo,
sd=sigma_placebo, observed=placebo) diff_means = pm.Deterministic('diff_means',
mu_drug - mu_placebo) pooled_sd = pm.Deterministic('pooled_sd',
np.sqrt(np.power(sigma_drug, 2) + np.power(sigma_placebo, 2) / 2)) effect_size
= pm.Deterministic('effect_size', diff_means / pooled_sd) with kruschke_model:
kruschke_trace = pm.sample(10000, step=pm.Metropolis())

结果：
pm.traceplot(kruschke_trace[2000:], varnames=['mu_drug', 'mu_placebo'])
plt.show()

pm.plot_posterior(kruschke_trace[2000:], color='#87ceeb',varnames=['mu_drug',
'mu_placebo', 'diff_means']) plt.show()