第9章: 集団ゲノミクス

9. 集団ゲノミクス

9.1 ゲノムワイド関連解析（GWAS）

GWAS実装:

import statsmodels.api as sm
from scipy import stats

class GWASAnalyzer:
    """GWAS解析パイプライン"""
    
    def __init__(self, genotype_matrix, phenotype, covariates=None):
        self.genotypes = genotype_matrix
        self.phenotype = phenotype
        self.covariates = covariates
        
    def run_gwas(self):
        """
        線形回帰によるGWAS
        """
        results = []
        
        for snp_idx in range(self.genotypes.shape[1]):
            snp_data = self.genotypes[:, snp_idx]
            
            # 線形モデルの構築
            X = snp_data
            if self.covariates is not None:
                X = np.column_stack([X, self.covariates])
            X = sm.add_constant(X)
            
            # 回帰分析
            model = sm.OLS(self.phenotype, X)
            result = model.fit()
            
            # P値とベータ値の抽出
            p_value = result.pvalues[1]  # SNPの効果
            beta = result.params[1]
            
            results.append({
                'snp_idx': snp_idx,
                'p_value': p_value,
                'beta': beta
            })
            
        return pd.DataFrame(results)
    
    def manhattan_plot(self, results):
        """マンハッタンプロットの作成"""
        import matplotlib.pyplot as plt
        
        # -log10(p-value)の計算
        results['neglog10p'] = -np.log10(results['p_value'])
        
        plt.figure(figsize=(12, 6))
        plt.scatter(results.index, results['neglog10p'], alpha=0.5)
        plt.axhline(y=-np.log10(5e-8), color='r', linestyle='--')
        plt.xlabel('SNP Position')
        plt.ylabel('-log10(P-value)')
        plt.title('Manhattan Plot')
        plt.show()

9.2 集団構造解析

PCAによる集団構造:

class PopulationStructure:
    """集団構造解析"""
    
    def __init__(self, genotype_matrix):
        self.genotypes = genotype_matrix
        
    def perform_pca(self, n_components=10):
        """
        遺伝的主成分分析
        """
        from sklearn.decomposition import PCA
        
        # アレル頻度の標準化
        standardized = self.standardize_genotypes()
        
        # PCA実行
        pca = PCA(n_components=n_components)
        pc_coords = pca.fit_transform(standardized)
        
        return pc_coords, pca.explained_variance_ratio_
    
    def standardize_genotypes(self):
        """Patterson et al. 2006の標準化"""
        p = np.mean(self.genotypes, axis=0) / 2
        
        std_geno = (self.genotypes - 2*p) / np.sqrt(2*p*(1-p))
        return std_geno

9.3 選択圧の検出

自然選択の検出手法:

class SelectionDetection:
    """自然選択シグナルの検出"""
    
    def calculate_fst(self, pop1_geno, pop2_geno):
        """
        集団間の遺伝的分化（FST）
        """
        # アレル頻度の計算
        p1 = np.mean(pop1_geno, axis=0) / 2
        p2 = np.mean(pop2_geno, axis=0) / 2
        
        # 全体のアレル頻度
        p_total = (p1 + p2) / 2
        
        # FSTの計算（Weir & Cockerham）
        num = (p1 - p2)**2
        denom = p_total * (1 - p_total)
        
        fst = num / (denom + 1e-10)
        return fst
    
    def integrated_haplotype_score(self, haplotypes, position):
        """
        iHS (integrated Haplotype Score)の計算
        """
        # Extended Haplotype Homozygosityの計算
        # 実装省略（計算集約的）
        pass

前へ: シングルセル・空間解析

次へ: データベース技術