9. 集団ゲノミクス
9.1 ゲノムワイド関連解析(GWAS)
GWAS実装:
import statsmodels.api as sm
from scipy import stats
class GWASAnalyzer:
"""GWAS解析パイプライン"""
def __init__(self, genotype_matrix, phenotype, covariates=None):
self.genotypes = genotype_matrix
self.phenotype = phenotype
self.covariates = covariates
def run_gwas(self):
"""
線形回帰によるGWAS
"""
results = []
for snp_idx in range(self.genotypes.shape[1]):
snp_data = self.genotypes[:, snp_idx]
# 線形モデルの構築
X = snp_data
if self.covariates is not None:
X = np.column_stack([X, self.covariates])
X = sm.add_constant(X)
# 回帰分析
model = sm.OLS(self.phenotype, X)
result = model.fit()
# P値とベータ値の抽出
p_value = result.pvalues[1] # SNPの効果
beta = result.params[1]
results.append({
'snp_idx': snp_idx,
'p_value': p_value,
'beta': beta
})
return pd.DataFrame(results)
def manhattan_plot(self, results):
"""マンハッタンプロットの作成"""
import matplotlib.pyplot as plt
# -log10(p-value)の計算
results['neglog10p'] = -np.log10(results['p_value'])
plt.figure(figsize=(12, 6))
plt.scatter(results.index, results['neglog10p'], alpha=0.5)
plt.axhline(y=-np.log10(5e-8), color='r', linestyle='--')
plt.xlabel('SNP Position')
plt.ylabel('-log10(P-value)')
plt.title('Manhattan Plot')
plt.show()
9.2 集団構造解析
PCAによる集団構造:
class PopulationStructure:
"""集団構造解析"""
def __init__(self, genotype_matrix):
self.genotypes = genotype_matrix
def perform_pca(self, n_components=10):
"""
遺伝的主成分分析
"""
from sklearn.decomposition import PCA
# アレル頻度の標準化
standardized = self.standardize_genotypes()
# PCA実行
pca = PCA(n_components=n_components)
pc_coords = pca.fit_transform(standardized)
return pc_coords, pca.explained_variance_ratio_
def standardize_genotypes(self):
"""Patterson et al. 2006の標準化"""
p = np.mean(self.genotypes, axis=0) / 2
std_geno = (self.genotypes - 2*p) / np.sqrt(2*p*(1-p))
return std_geno
9.3 選択圧の検出
自然選択の検出手法:
class SelectionDetection:
"""自然選択シグナルの検出"""
def calculate_fst(self, pop1_geno, pop2_geno):
"""
集団間の遺伝的分化(FST)
"""
# アレル頻度の計算
p1 = np.mean(pop1_geno, axis=0) / 2
p2 = np.mean(pop2_geno, axis=0) / 2
# 全体のアレル頻度
p_total = (p1 + p2) / 2
# FSTの計算(Weir & Cockerham)
num = (p1 - p2)**2
denom = p_total * (1 - p_total)
fst = num / (denom + 1e-10)
return fst
def integrated_haplotype_score(self, haplotypes, position):
"""
iHS (integrated Haplotype Score)の計算
"""
# Extended Haplotype Homozygosityの計算
# 実装省略(計算集約的)
pass
前へ: シングルセル・空間解析 | 目次 | 次へ: データベース技術 |