Hi, I have run a fastq.gz sample file on NanoCLUST using following command nextflow run NanoCLUST/main.nf -profile conda --reads barcode01_filt.fastq.gz --db db/16S_ribosomal_RNA --tax db/taxdb/
Run Name : tiny_wescoff Config Profile : conda
Getting error in read_clustering :
_Error executing process > 'read_clustering (1)'
Caused by: Process read_clustering (1) terminated with an error exit status (1)
Command executed [/userdata/Punit/Rashmita_data/nanoclust/NanoCLUST/templates/umap_hdbscan.py]:
!/usr/bin/env python
import numpy as np import umap import matplotlib.pyplot as plt from sklearn import decomposition import random import pandas as pd import hdbscan
df = pd.read_csv("freqs.txt", delimiter=" ")
UMAP
motifs = [x for x in df.columns.values if x not in ["read", "length"]] X = df.loc[:,motifs] X_embedded = umap.UMAP(n_neighbors=15, min_dist=0.1, verbose=2).fit_transform(X)
df_umap = pd.DataFrame(X_embedded, columns=["D1", "D2"]) umap_out = pd.concat([df["read"], df["length"], df_umap], axis=1)
HDBSCAN
X = umap_out.loc[:,["D1", "D2"]] umap_out["bin_id"] = hdbscan.HDBSCAN(min_cluster_size=int(50), cluster_selection_epsilon=int(0.5)).fit_predict(X)
PLOT
plt.figure(figsize=(20,20)) plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=umap_out["bin_id"], cmap='Spectral', s=1) plt.xlabel("UMAP1", fontsize=18) plt.ylabel("UMAP2", fontsize=18) plt.gca().set_aspect('equal', 'datalim') plt.title("Projecting " + str(len(umap_out['bin_id'])) + " reads. " + str(len(umap_out['bin_id'].unique())) + " clusters generated by HDBSCAN", fontsize=18)
for cluster in np.sort(umap_out['bin_id'].unique()): read = umap_out.loc[umap_out['bin_id'] == cluster].iloc[0] plt.annotate(str(cluster), (read['D1'], read['D2']), weight='bold', size=14)
plt.savefig('hdbscan.output.png') umap_out.to_csv("hdbscan.output.tsv", sep=" ", index=False)
Command exit status: 1
Command output: (empty)
Command error: retval = self._compile_core(args, return_type) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/dispatcher.py", line 106, in _compile_core cres = compiler.compile_extra(self.targetdescr.typing_context, File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 606, in compile_extra return pipeline.compile_extra(func) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 353, in compile_extra return self._compile_bytecode() File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 415, in _compile_bytecode return self._compile_core() File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 395, in _compile_core raise e File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 386, in _compile_core pm.run(self.state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_machinery.py", line 339, in run raise patched_exception File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_machinery.py", line 330, in run self._runPass(idx, pass_inst, state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock return func(*args, **kwargs) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_machinery.py", line 289, in _runPass mutated |= check(pss.run_pass, internal_state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_machinery.py", line 262, in check mangled = func(compiler_state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/typed_passes.pexecutor > local (5) [bf/efa57f] process > QC (1) [100%] 1 of 1 ✔ [7e/0435bb] process > fastqc (1) [100%] 1 of 1 ✔ [e0/371311] process > kmer_freqs (1) [100%] 1 of 1 ✔ [59/28559d] process > read_clustering (1) [100%] 1 of 1, failed: 1 ✘ [- ] process > split_by_cluster - [- ] process > read_correction - [- ] process > draft_selection - [- ] process > racon_pass - [- ] process > medaka_pass - [- ] process > consensus_classification - [- ] process > join_results - [- ] process > get_abundances - [- ] process > plot_abundances - [e3/c6f429] process > output_documentation [100%] 1 of 1 ✔ Error executing process > 'read_clustering (1)'
Caused by: Process read_clustering (1) terminated with an error exit status (1)
Command executed [/userdata/Punit/Rashmita_data/nanoclust/NanoCLUST/templates/umap_hdbscan.py]:
!/usr/bin/env python
import numpy as np import umap import matplotlib.pyplot as plt from sklearn import decomposition import random import pandas as pd import hdbscan
df = pd.read_csv("freqs.txt", delimiter="lue=$")
UMAP
motifs = [x for x in df.columns.values if x not in ["read", "length"]] X = df.loc[:,motifs] X_embedded = umap.UMAP(n_neighbors=15, min_dist=0.1, verbose=2).fit_transform(X)
df_umap = pd.DataFrame(X_embedded, columns=["D1", "D2"]) umap_out = pd.concat([df["read"], df["length"], df_umap], axis=1)
HDBSCAN
X = umap_out.loc[:,["D1", "D2"]] umap_out["bin_id"] = hdbscan.HDBSCAN(min_cluster_size=int(50), cluster_selection_epsilon=int(0.5)).fit_predict(X)
PLOT
plt.figure(figsize=(20,20)) plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=umap_out["bin_id"], cmap='Spectral', s=1) plt.xlabel("UMAP1", fontsize=18) plt.ylabel("UMAP2", fontsize=18) plt.gca().set_aspect('equal', 'datalim') plt.title("Projecting " + str(len(umap_out['bin_id'])) + " reads. " + str(len(umap_out['bin_id'].unique())) + " clusters generated by HDBSCAN", fontsize=18)
for cluster in np.sort(umap_out['bin_id'].unique()): read = umap_out.loc[umap_out['bin_id'] == cluster].iloc[0] plt.annotate(str(cluster), (read['D1'], read['D2']), weight='bold', size=14)
plt.savefig('hdbscan.output.png') umap_out.to_csv("hdbscan.output.tsv", sep=" ", index=False)
Command exit status: 1
Command output: (empty)
Command error: retval = self._compile_core(args, return_type) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/dispatcher.py", line 106, in _compile_core cres = compiler.compile_extra(self.targetdescr.typing_context, File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 606, in compile_extra return pipeline.compile_extra(func) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 353, in compile_extra return self._compile_bytecode() File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 415, in _compile_bytecode return self._compile_core() File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 395, in _compile_core raise e File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler.py", line 386, in _compile_core pm.run(self.state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_machinery.py", line 339, in run raise patched_exception File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_machinery.py", line 330, in run self._runPass(idx, pass_inst, state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock return func(*args, **kwargs) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_machinery.py", line 289, in _runPass mutated |= check(pss.run_pass, internal_state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/compiler_machinery.py", line 262, in check mangled = func(compiler_state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/typed_passes.py", line 463, in run_pass NativeLowering().run_pass(state) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/typed_passes.py", line 384, in run_pass lower.lower() File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/lowering.py", line 136, in lower self.lower_normal_function(self.fndesc) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/lowering.py", line 190, in lower_normal_function entry_block_tail = self.lower_function_body() File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/lowering.py", line 216, in lower_function_body self.lower_block(block) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/lowering.py", line 230, in lower_block self.lower_inst(inst) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/contextlib.py", line 131, in exit self.gen.throw(type, value, traceback) File "/userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/numba/core/errors.py", line 751, in new_error_context raise newerr.with_traceback(tb) numba.core.errors.LoweringError: Failed in nopython mode pipeline (step: nopython mode backend) Storing i64 to ptr of i32 ('dim'). FE type int32
File "../../conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/umap/layouts.py", line 52: def rdist(x, y):
result = 0.0 dim = x.shape[0] ^
During: lowering "dim = static_getitem(value=$8load_attr.2, index=0, index_var=$const10.3, fn=)" at /userdata/Punit/Rashmita_data/nanoclust/work/conda/read_clustering-800e1e27475cbaa0538f834c4aacc420/lib/python3.8/site-packages/umap/layouts.py (52)
Work dir: /userdata/Punit/Rashmita_data/nanoclust/work/59/28559dcb43230a9ddfc92eef9ab981
Tip: when you have fixed the problem you can continue the execution adding the option -resume to the run command line_
can you help me in resolving the issue Thanks
Your post is one of those where proper formatting is more than eyecandy. The error suggests a numba problem:
https://github.com/numba/numba/issues/5415
https://github.com/numba/numba/issues/5076